utils

`lacuna.utils` ¶

Utility functions for data preprocessing and connectome preparation.

This module provides tools for working with neuroimaging datasets and preparing data for lesion network mapping analyses.

Key Components

GSP1000 Utilities: - create_connectome_batches: Convert GSP1000 data to optimized HDF5 batches - validate_connectome_batches: Verify integrity of batch files

Logging Utilities: - ConsoleLogger: Consistent console logger for user-facing messages - log_section, log_info, log_success, log_warning, log_error, log_progress: Convenience functions

Suggestion Utilities: - suggest_similar: Find similar strings for error message suggestions - format_suggestions: Format suggestions for error messages

`ConsoleLogger` ¶

Consistent console logger for user-facing messages.

Uses the standard Python logging module for output, ensuring consistent formatting with timestamps and module names across all Lacuna modules.

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	If True, print messages. If False, silent mode (no output).	`True`
`width`	`int`	Width for section headers	`70`
`indent`	`str`	Indentation string for nested messages	`" "`
`name`	`str`	Logger name for the Python logging module	`"lacuna"`

Examples:

>>> logger = ConsoleLogger(verbose=True)
>>> logger.section("PROCESSING DATA")
2026-01-15 10:00:00 - lacuna - INFO - ============================================
2026-01-15 10:00:00 - lacuna - INFO - PROCESSING DATA
2026-01-15 10:00:00 - lacuna - INFO - ============================================

>>> logger.info("Loading connectome...")
2026-01-15 10:00:00 - lacuna - INFO - Loading connectome...

>>> logger.success("Analysis complete", details={"subjects": 10, "time": 42.3})
2026-01-15 10:00:00 - lacuna - INFO - Analysis complete
2026-01-15 10:00:00 - lacuna - INFO -   subjects: 10
2026-01-15 10:00:00 - lacuna - INFO -   time: 42.3

Source code in src/lacuna/utils/logging.py

class ConsoleLogger:
    """
    Consistent console logger for user-facing messages.

    Uses the standard Python logging module for output, ensuring consistent
    formatting with timestamps and module names across all Lacuna modules.

    Parameters
    ----------
    verbose : bool, default=True
        If True, print messages. If False, silent mode (no output).
    width : int, default=70
        Width for section headers
    indent : str, default="  "
        Indentation string for nested messages
    name : str, default="lacuna"
        Logger name for the Python logging module

    Examples
    --------
    >>> logger = ConsoleLogger(verbose=True)
    >>> logger.section("PROCESSING DATA")
    2026-01-15 10:00:00 - lacuna - INFO - ============================================
    2026-01-15 10:00:00 - lacuna - INFO - PROCESSING DATA
    2026-01-15 10:00:00 - lacuna - INFO - ============================================

    >>> logger.info("Loading connectome...")
    2026-01-15 10:00:00 - lacuna - INFO - Loading connectome...

    >>> logger.success("Analysis complete", details={"subjects": 10, "time": 42.3})
    2026-01-15 10:00:00 - lacuna - INFO - Analysis complete
    2026-01-15 10:00:00 - lacuna - INFO -   subjects: 10
    2026-01-15 10:00:00 - lacuna - INFO -   time: 42.3
    """

    def __init__(
        self,
        verbose: bool = False,
        width: int = 70,
        indent: str = "  ",
        name: str = "lacuna.analysis",
    ):
        """Initialize console logger."""
        self.verbose = verbose
        self.width = width
        self.indent = indent
        self._logger = logging.getLogger(name)

    def _log(self, message: str, level: int = logging.INFO) -> None:
        """
        Log message if verbose mode is enabled.

        Parameters
        ----------
        message : str
            Message to log
        level : int
            Logging level (default: INFO)
        """
        if self.verbose:
            self._logger.log(level, message)

    def section(self, title: str) -> None:
        """
        Print a major section header.

        Parameters
        ----------
        title : str
            Section title

        Examples
        --------
        >>> logger.section("ANALYSIS PIPELINE")
        """
        if self.verbose:
            separator = "=" * self.width
            self._log("")
            self._log(separator)
            self._log(title)
            self._log(separator)

    def subsection(self, title: str) -> None:
        """
        Print a minor subsection header.

        Parameters
        ----------
        title : str
            Subsection title

        Examples
        --------
        >>> logger.subsection("Loading data")
        """
        if self.verbose:
            separator = "-" * self.width
            self._log("")
            self._log(separator)
            self._log(title)
            self._log(separator)

    def debug(self, message: str, indent_level: int = 0) -> None:
        """
        Print a debug message (only shown with -vv verbosity).

        Parameters
        ----------
        message : str
            Debug message
        indent_level : int, default=0
            Indentation level (0, 1, 2, ...)

        Examples
        --------
        >>> logger.debug("Computing correlation maps")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}", level=logging.DEBUG)

    def info(self, message: str, indent_level: int = 0) -> None:
        """
        Print an informational message.

        Parameters
        ----------
        message : str
            Information message
        indent_level : int, default=0
            Indentation level (0, 1, 2, ...)

        Examples
        --------
        >>> logger.info("Loading mask information...")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}")

    def success(
        self,
        message: str,
        details: dict | None = None,
        indent_level: int = 0,
    ) -> None:
        """
        Print a success message with optional details.

        Parameters
        ----------
        message : str
            Success message
        details : dict, optional
            Dictionary of key-value pairs to display
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.success("Analysis complete", details={"time": 42.3, "subjects": 10})
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}")

        if details and self.verbose:
            detail_indent = self.indent * (indent_level + 1)
            for key, value in details.items():
                # Format numbers nicely
                if isinstance(value, float):
                    formatted_value = f"{value:.2f}"
                elif isinstance(value, int) and value >= 1000:
                    formatted_value = f"{value:,}"
                else:
                    formatted_value = str(value)

                self._log(f"{detail_indent}{key}: {formatted_value}")

    def warning(self, message: str, indent_level: int = 0) -> None:
        """
        Print a warning message.

        Parameters
        ----------
        message : str
            Warning message
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.warning("Mask size smaller than expected")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}", level=logging.WARNING)

    def error(self, message: str, indent_level: int = 0) -> None:
        """
        Print an error message.

        Parameters
        ----------
        message : str
            Error message
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.error("Failed to load connectome")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}", level=logging.ERROR)

    def progress(
        self,
        message: str,
        current: int | None = None,
        total: int | None = None,
        percent: float | None = None,
        indent_level: int = 0,
    ) -> None:
        """
        Print a progress update.

        Parameters
        ----------
        message : str
            Progress message
        current : int, optional
            Current item number
        total : int, optional
            Total items
        percent : float, optional
            Completion percentage (0-100)
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.progress("Processing batch", current=3, total=10)
        >>> logger.progress("Loading data", percent=65.5)
        """
        indent = self.indent * indent_level
        progress_str = f"{indent}{message}"

        if current is not None and total is not None:
            progress_str += f" [{current}/{total}]"
        elif percent is not None:
            progress_str += f" [{percent:.1f}%]"

        self._log(progress_str)

    def result_summary(self, title: str, metrics: dict, indent_level: int = 0) -> None:
        """
        Print a formatted summary of results.

        Parameters
        ----------
        title : str
            Summary title
        metrics : dict
            Dictionary of metric name: value pairs
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.result_summary("Analysis Results", {
        ...     "Mean correlation": 0.4523,
        ...     "Std correlation": 0.1234,
        ...     "Range": "[-0.45, 0.89]"
        ... })
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{title}:")

        detail_indent = self.indent * (indent_level + 1)
        for key, value in metrics.items():
            if isinstance(value, float):
                formatted_value = f"{value:.4f}"
            elif isinstance(value, int) and value >= 1000:
                formatted_value = f"{value:,}"
            else:
                formatted_value = str(value)

            self._log(f"{detail_indent}{key}: {formatted_value}")

    def blank_line(self) -> None:
        """Print a blank line for spacing."""
        if self.verbose:
            self._log("")

`init(verbose=False, width=70, indent=' ', name='lacuna.analysis')` ¶

Initialize console logger.

Source code in src/lacuna/utils/logging.py

def __init__(
    self,
    verbose: bool = False,
    width: int = 70,
    indent: str = "  ",
    name: str = "lacuna.analysis",
):
    """Initialize console logger."""
    self.verbose = verbose
    self.width = width
    self.indent = indent
    self._logger = logging.getLogger(name)

`blank_line()` ¶

Print a blank line for spacing.

Source code in src/lacuna/utils/logging.py

def blank_line(self) -> None:
    """Print a blank line for spacing."""
    if self.verbose:
        self._log("")

`debug(message, indent_level=0)` ¶

Print a debug message (only shown with -vv verbosity).

Parameters:

Name	Type	Description	Default
`message`	`str`	Debug message	required
`indent_level`	`int`	Indentation level (0, 1, 2, ...)	`0`

Examples:

>>> logger.debug("Computing correlation maps")

Source code in src/lacuna/utils/logging.py

def debug(self, message: str, indent_level: int = 0) -> None:
    """
    Print a debug message (only shown with -vv verbosity).

    Parameters
    ----------
    message : str
        Debug message
    indent_level : int, default=0
        Indentation level (0, 1, 2, ...)

    Examples
    --------
    >>> logger.debug("Computing correlation maps")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}", level=logging.DEBUG)

`error(message, indent_level=0)` ¶

Print an error message.

Parameters:

Name	Type	Description	Default
`message`	`str`	Error message	required
`indent_level`	`int`	Indentation level	`0`

Examples:

>>> logger.error("Failed to load connectome")

Source code in src/lacuna/utils/logging.py

def error(self, message: str, indent_level: int = 0) -> None:
    """
    Print an error message.

    Parameters
    ----------
    message : str
        Error message
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.error("Failed to load connectome")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}", level=logging.ERROR)

`info(message, indent_level=0)` ¶

Print an informational message.

Parameters:

Name	Type	Description	Default
`message`	`str`	Information message	required
`indent_level`	`int`	Indentation level (0, 1, 2, ...)	`0`

Examples:

>>> logger.info("Loading mask information...")

Source code in src/lacuna/utils/logging.py

def info(self, message: str, indent_level: int = 0) -> None:
    """
    Print an informational message.

    Parameters
    ----------
    message : str
        Information message
    indent_level : int, default=0
        Indentation level (0, 1, 2, ...)

    Examples
    --------
    >>> logger.info("Loading mask information...")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}")

`progress(message, current=None, total=None, percent=None, indent_level=0)` ¶

Print a progress update.

Parameters:

Name	Type	Description	Default
`message`	`str`	Progress message	required
`current`	`int`	Current item number	`None`
`total`	`int`	Total items	`None`
`percent`	`float`	Completion percentage (0-100)	`None`
`indent_level`	`int`	Indentation level	`0`

Examples:

>>> logger.progress("Processing batch", current=3, total=10)
>>> logger.progress("Loading data", percent=65.5)

Source code in src/lacuna/utils/logging.py

def progress(
    self,
    message: str,
    current: int | None = None,
    total: int | None = None,
    percent: float | None = None,
    indent_level: int = 0,
) -> None:
    """
    Print a progress update.

    Parameters
    ----------
    message : str
        Progress message
    current : int, optional
        Current item number
    total : int, optional
        Total items
    percent : float, optional
        Completion percentage (0-100)
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.progress("Processing batch", current=3, total=10)
    >>> logger.progress("Loading data", percent=65.5)
    """
    indent = self.indent * indent_level
    progress_str = f"{indent}{message}"

    if current is not None and total is not None:
        progress_str += f" [{current}/{total}]"
    elif percent is not None:
        progress_str += f" [{percent:.1f}%]"

    self._log(progress_str)

`result_summary(title, metrics, indent_level=0)` ¶

Print a formatted summary of results.

Parameters:

Name	Type	Description	Default
`title`	`str`	Summary title	required
`metrics`	`dict`	Dictionary of metric name: value pairs	required
`indent_level`	`int`	Indentation level	`0`

Examples:

>>> logger.result_summary("Analysis Results", {
...     "Mean correlation": 0.4523,
...     "Std correlation": 0.1234,
...     "Range": "[-0.45, 0.89]"
... })

Source code in src/lacuna/utils/logging.py

def result_summary(self, title: str, metrics: dict, indent_level: int = 0) -> None:
    """
    Print a formatted summary of results.

    Parameters
    ----------
    title : str
        Summary title
    metrics : dict
        Dictionary of metric name: value pairs
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.result_summary("Analysis Results", {
    ...     "Mean correlation": 0.4523,
    ...     "Std correlation": 0.1234,
    ...     "Range": "[-0.45, 0.89]"
    ... })
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{title}:")

    detail_indent = self.indent * (indent_level + 1)
    for key, value in metrics.items():
        if isinstance(value, float):
            formatted_value = f"{value:.4f}"
        elif isinstance(value, int) and value >= 1000:
            formatted_value = f"{value:,}"
        else:
            formatted_value = str(value)

        self._log(f"{detail_indent}{key}: {formatted_value}")

`section(title)` ¶

Print a major section header.

Parameters:

Name	Type	Description	Default
`title`	`str`	Section title	required

Examples:

>>> logger.section("ANALYSIS PIPELINE")

Source code in src/lacuna/utils/logging.py

def section(self, title: str) -> None:
    """
    Print a major section header.

    Parameters
    ----------
    title : str
        Section title

    Examples
    --------
    >>> logger.section("ANALYSIS PIPELINE")
    """
    if self.verbose:
        separator = "=" * self.width
        self._log("")
        self._log(separator)
        self._log(title)
        self._log(separator)

`subsection(title)` ¶

Print a minor subsection header.

Parameters:

Name	Type	Description	Default
`title`	`str`	Subsection title	required

Examples:

>>> logger.subsection("Loading data")

Source code in src/lacuna/utils/logging.py

def subsection(self, title: str) -> None:
    """
    Print a minor subsection header.

    Parameters
    ----------
    title : str
        Subsection title

    Examples
    --------
    >>> logger.subsection("Loading data")
    """
    if self.verbose:
        separator = "-" * self.width
        self._log("")
        self._log(separator)
        self._log(title)
        self._log(separator)

`success(message, details=None, indent_level=0)` ¶

Print a success message with optional details.

Parameters:

Name	Type	Description	Default
`message`	`str`	Success message	required
`details`	`dict`	Dictionary of key-value pairs to display	`None`
`indent_level`	`int`	Indentation level	`0`

Examples:

>>> logger.success("Analysis complete", details={"time": 42.3, "subjects": 10})

Source code in src/lacuna/utils/logging.py

def success(
    self,
    message: str,
    details: dict | None = None,
    indent_level: int = 0,
) -> None:
    """
    Print a success message with optional details.

    Parameters
    ----------
    message : str
        Success message
    details : dict, optional
        Dictionary of key-value pairs to display
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.success("Analysis complete", details={"time": 42.3, "subjects": 10})
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}")

    if details and self.verbose:
        detail_indent = self.indent * (indent_level + 1)
        for key, value in details.items():
            # Format numbers nicely
            if isinstance(value, float):
                formatted_value = f"{value:.2f}"
            elif isinstance(value, int) and value >= 1000:
                formatted_value = f"{value:,}"
            else:
                formatted_value = str(value)

            self._log(f"{detail_indent}{key}: {formatted_value}")

`warning(message, indent_level=0)` ¶

Print a warning message.

Parameters:

Name	Type	Description	Default
`message`	`str`	Warning message	required
`indent_level`	`int`	Indentation level	`0`

Examples:

>>> logger.warning("Mask size smaller than expected")

Source code in src/lacuna/utils/logging.py

def warning(self, message: str, indent_level: int = 0) -> None:
    """
    Print a warning message.

    Parameters
    ----------
    message : str
        Warning message
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.warning("Mask size smaller than expected")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}", level=logging.WARNING)

`create_connectome_batches(gsp_dir, mask_path, output_dir, subjects_per_batch=50, pattern='sub-/func/bld001_rest_*_finalmask.nii.gz', verbose=False)` ¶

Create HDF5 batch files from GSP1000 functional data.

Scans a directory of functional NIfTI files, extracts time-series from within a brain mask, and saves the data into multiple smaller HDF5 batch files optimized for memory-efficient lesion network mapping.

Parameters:

Name	Type	Description	Default
`gsp_dir`	`str or Path`	Directory containing GSP1000 subject functional data. Expected structure: sub-/func/bld001_rest_*_finalmask.nii.gz	required
`mask_path`	`str or Path`	Path to brain mask NIfTI file (e.g., MNI152_T1_2mm_Brain_Mask.nii.gz). Defines which voxels to extract.	required
`output_dir`	`str or Path`	Directory where HDF5 batch files will be saved.	required
`subjects_per_batch`	`int`	Number of subjects to include in each batch file. Larger batches = fewer files but more memory per batch.	`50`
`pattern`	`str`	Glob pattern to find functional files within gsp_dir. Default matches standard GSP1000 structure.	`'sub-/func/bld001_rest_*_finalmask.nii.gz'`
`verbose`	`bool`	Print progress information.	`True`

Returns:

Type	Description
`list of Path`	Paths to created HDF5 batch files, sorted by name.

Notes

Each HDF5 batch file contains: - 'timeseries': (n_subjects, n_timepoints, n_voxels) float32 array - 'mask_indices': (3, n_voxels) array of mask coordinates - 'mask_affine': (4, 4) affine transformation matrix - Attributes: n_subjects, n_timepoints, n_voxels, mask_shape

The batch files are designed for sequential loading during analysis, minimizing memory footprint while maintaining processing speed.

Examples:

>>> from lacuna.utils.gsp1000 import create_connectome_batches
>>> batch_files = create_connectome_batches(
...     gsp_dir="/data/GSP1000",
...     mask_path="/data/MNI152_T1_2mm_Brain_Mask.nii.gz",
...     output_dir="/data/connectomes/gsp1000_batches",
...     subjects_per_batch=100
... )
>>> print(f"Created {len(batch_files)} batch files")

Source code in src/lacuna/utils/gsp1000.py

def create_connectome_batches(
    gsp_dir: str | Path,
    mask_path: str | Path,
    output_dir: str | Path,
    subjects_per_batch: int = 50,
    pattern: str = "sub-*/func/*bld001_rest_*_finalmask.nii.gz",
    verbose: bool = False,
) -> list[Path]:
    """Create HDF5 batch files from GSP1000 functional data.

    Scans a directory of functional NIfTI files, extracts time-series from
    within a brain mask, and saves the data into multiple smaller HDF5 batch
    files optimized for memory-efficient lesion network mapping.

    Parameters
    ----------
    gsp_dir : str or Path
        Directory containing GSP1000 subject functional data.
        Expected structure: sub-*/func/*bld001_rest_*_finalmask.nii.gz
    mask_path : str or Path
        Path to brain mask NIfTI file (e.g., MNI152_T1_2mm_Brain_Mask.nii.gz).
        Defines which voxels to extract.
    output_dir : str or Path
        Directory where HDF5 batch files will be saved.
    subjects_per_batch : int, default=50
        Number of subjects to include in each batch file.
        Larger batches = fewer files but more memory per batch.
    pattern : str, optional
        Glob pattern to find functional files within gsp_dir.
        Default matches standard GSP1000 structure.
    verbose : bool, default=True
        Print progress information.

    Returns
    -------
    list of Path
        Paths to created HDF5 batch files, sorted by name.

    Notes
    -----
    Each HDF5 batch file contains:
    - 'timeseries': (n_subjects, n_timepoints, n_voxels) float32 array
    - 'mask_indices': (3, n_voxels) array of mask coordinates
    - 'mask_affine': (4, 4) affine transformation matrix
    - Attributes: n_subjects, n_timepoints, n_voxels, mask_shape

    The batch files are designed for sequential loading during analysis,
    minimizing memory footprint while maintaining processing speed.

    Examples
    --------
    >>> from lacuna.utils.gsp1000 import create_connectome_batches
    >>> batch_files = create_connectome_batches(
    ...     gsp_dir="/data/GSP1000",
    ...     mask_path="/data/MNI152_T1_2mm_Brain_Mask.nii.gz",
    ...     output_dir="/data/connectomes/gsp1000_batches",
    ...     subjects_per_batch=100
    ... )
    >>> print(f"Created {len(batch_files)} batch files")
    """
    gsp_dir = Path(gsp_dir)
    mask_path = Path(mask_path)
    output_dir = Path(output_dir)

    if verbose:
        print("🚀 Creating connectome batch files from GSP1000 data...")

    # 1. Find all functional NIfTI files
    search_pattern = str(gsp_dir / pattern)
    all_subject_files = sorted(glob.glob(search_pattern))

    if not all_subject_files:
        raise FileNotFoundError(
            f"No NIfTI files found matching pattern: {search_pattern}\n"
            f"Expected structure: {gsp_dir}/sub-*/func/*bld001_rest_*_finalmask.nii.gz"
        )

    n_total_subjects = len(all_subject_files)
    if verbose:
        print(f"✓ Found {n_total_subjects} subject files")

    # 2. Load brain mask and extract metadata
    if verbose:
        print(f"Loading brain mask: {mask_path}")
    mask_img = nib.load(mask_path)
    mask_data = mask_img.get_fdata().astype(bool)
    mask_affine = mask_img.affine
    in_mask_indices = np.where(mask_data)
    n_voxels = len(in_mask_indices[0])

    # Get number of timepoints from first subject
    first_img = nib.load(all_subject_files[0])
    n_timepoints = first_img.shape[3]

    if verbose:
        print(f"✓ Mask contains {n_voxels:,} in-brain voxels")
        print(f"✓ Detected {n_timepoints} timepoints per subject")

    # 3. Split subjects into batches
    subject_batches = [
        all_subject_files[i : i + subjects_per_batch]
        for i in range(0, n_total_subjects, subjects_per_batch)
    ]
    n_batches = len(subject_batches)

    if verbose:
        print(f"✓ Will create {n_batches} batch files ({subjects_per_batch} subjects/batch)")

    output_dir.mkdir(parents=True, exist_ok=True)
    created_files = []

    # 4. Process each batch
    progress_iter = tqdm(subject_batches, desc="Creating batches") if verbose else subject_batches

    for batch_idx, batch_files in enumerate(progress_iter):
        batch_filename = output_dir / f"connectome_batch_{batch_idx:03d}.h5"
        n_subjects_in_batch = len(batch_files)

        with h5py.File(batch_filename, "w") as hf:
            # Create dataset for this batch
            timeseries_dset = hf.create_dataset(
                "timeseries",
                shape=(n_subjects_in_batch, n_timepoints, n_voxels),
                dtype=np.float32,
                chunks=(1, n_timepoints, n_voxels),
                compression="gzip",
                compression_opts=1,  # Fast compression
            )

            # Store metadata (makes each batch self-contained)
            hf.create_dataset("mask_indices", data=np.vstack(in_mask_indices))
            hf.create_dataset("mask_affine", data=mask_affine)
            hf.attrs["n_subjects"] = n_subjects_in_batch
            hf.attrs["n_timepoints"] = n_timepoints
            hf.attrs["n_voxels"] = n_voxels
            hf.attrs["mask_shape"] = mask_data.shape

            # Process subjects in current batch
            for subj_idx, func_path in enumerate(batch_files):
                func_img = nib.load(func_path)
                func_data = func_img.get_fdata()

                # Extract in-mask voxels and transpose to (timepoints, voxels)
                subject_timeseries = func_data[in_mask_indices].T

                # Save to HDF5
                timeseries_dset[subj_idx, :, :] = subject_timeseries

        created_files.append(batch_filename)

    if verbose:
        print(f"\n✅ Created {len(created_files)} batch files in {output_dir}")
        total_size_mb = sum(f.stat().st_size for f in created_files) / (1024**2)
        print(f"✓ Total size: {total_size_mb:.1f} MB")

    return created_files

`format_suggestions(suggestions)` ¶

Format a list of suggestions for inclusion in an error message.

Parameters:

Name	Type	Description	Default
`suggestions`	`list[str]`	List of suggested strings.	required

Returns:

Type	Description
`str`	Formatted string for error message, or empty string if no suggestions.

Examples:

>>> format_suggestions(["rmap"])
"Did you mean 'rmap'?"

>>> format_suggestions(["rmap", "zscoremap"])
"Did you mean one of: 'rmap', 'zscoremap'?"

>>> format_suggestions([])
''

Source code in src/lacuna/utils/suggestions.py

def format_suggestions(suggestions: list[str]) -> str:
    """
    Format a list of suggestions for inclusion in an error message.

    Parameters
    ----------
    suggestions : list[str]
        List of suggested strings.

    Returns
    -------
    str
        Formatted string for error message, or empty string if no suggestions.

    Examples
    --------
    >>> format_suggestions(["rmap"])
    "Did you mean 'rmap'?"

    >>> format_suggestions(["rmap", "zscoremap"])
    "Did you mean one of: 'rmap', 'zscoremap'?"

    >>> format_suggestions([])
    ''
    """
    if not suggestions:
        return ""

    if len(suggestions) == 1:
        return f"Did you mean '{suggestions[0]}'?"

    quoted = [f"'{s}'" for s in suggestions]
    return f"Did you mean one of: {', '.join(quoted)}?"

`log_error(message, verbose=False)` ¶

Print an error message.

Source code in src/lacuna/utils/logging.py

def log_error(message: str, verbose: bool = False) -> None:
    """Print an error message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.error(message)

`log_info(message, verbose=False)` ¶

Print an info message.

Source code in src/lacuna/utils/logging.py

def log_info(message: str, verbose: bool = False) -> None:
    """Print an info message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.info(message)

`log_progress(message, current=None, total=None, verbose=False)` ¶

Print a progress message.

Source code in src/lacuna/utils/logging.py

def log_progress(
    message: str,
    current: int | None = None,
    total: int | None = None,
    verbose: bool = False,
) -> None:
    """Print a progress message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.progress(message, current=current, total=total)

`log_section(title, width=70, verbose=False)` ¶

Print a section header.

Source code in src/lacuna/utils/logging.py

def log_section(title: str, width: int = 70, verbose: bool = False) -> None:
    """Print a section header."""
    logger = ConsoleLogger(verbose=verbose, width=width)
    logger.section(title)

`log_success(message, details=None, verbose=False)` ¶

Print a success message.

Source code in src/lacuna/utils/logging.py

def log_success(message: str, details: dict | None = None, verbose: bool = False) -> None:
    """Print a success message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.success(message, details=details)

`log_warning(message, verbose=False)` ¶

Print a warning message.

Source code in src/lacuna/utils/logging.py

def log_warning(message: str, verbose: bool = False) -> None:
    """Print a warning message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.warning(message)

`suggest_similar(query, candidates, max_suggestions=3, min_similarity=0.4)` ¶

Find candidates most similar to the query string.

Uses difflib.SequenceMatcher for similarity scoring. Results are sorted by similarity (most similar first) and filtered by minimum threshold.

Parameters:

Name	Type	Description	Default
`query`	`str`	The string to find matches for (e.g., user's typo).	required
`candidates`	`list[str]`	Available options to suggest from.	required
`max_suggestions`	`int`	Maximum number of suggestions to return.	`3`
`min_similarity`	`float`	Minimum similarity ratio (0.0 to 1.0) to include a suggestion. Higher values require closer matches.	`0.4`

Returns:

Type	Description
`list[str]`	Up to `max_suggestions` similar candidates, sorted by similarity. Empty list if no candidates meet the minimum similarity threshold.

Examples:

>>> available = ["rmap", "zscoremap", "damagescore"]
>>> suggest_similar("rmp", available)
['rmap']

>>> suggest_similar("score", available)
['zscoremap', 'damagescore']

>>> suggest_similar("xyz", available, min_similarity=0.5)
[]  # No close matches

>>> # Case-insensitive matching
>>> suggest_similar("Rmap", available)
['rmap']

Source code in src/lacuna/utils/suggestions.py

def suggest_similar(
    query: str,
    candidates: list[str],
    max_suggestions: int = 3,
    min_similarity: float = 0.4,
) -> list[str]:
    """
    Find candidates most similar to the query string.

    Uses difflib.SequenceMatcher for similarity scoring. Results are sorted
    by similarity (most similar first) and filtered by minimum threshold.

    Parameters
    ----------
    query : str
        The string to find matches for (e.g., user's typo).
    candidates : list[str]
        Available options to suggest from.
    max_suggestions : int, default=3
        Maximum number of suggestions to return.
    min_similarity : float, default=0.4
        Minimum similarity ratio (0.0 to 1.0) to include a suggestion.
        Higher values require closer matches.

    Returns
    -------
    list[str]
        Up to `max_suggestions` similar candidates, sorted by similarity.
        Empty list if no candidates meet the minimum similarity threshold.

    Examples
    --------
    >>> available = ["rmap", "zscoremap", "damagescore"]
    >>> suggest_similar("rmp", available)
    ['rmap']

    >>> suggest_similar("score", available)
    ['zscoremap', 'damagescore']

    >>> suggest_similar("xyz", available, min_similarity=0.5)
    []  # No close matches

    >>> # Case-insensitive matching
    >>> suggest_similar("Rmap", available)
    ['rmap']
    """
    if not candidates:
        return []

    # Compute similarity for each candidate
    query_lower = query.lower()
    scored = []

    for candidate in candidates:
        # Use case-insensitive comparison for scoring
        ratio = SequenceMatcher(None, query_lower, candidate.lower()).ratio()
        if ratio >= min_similarity:
            scored.append((ratio, candidate))

    # Sort by similarity (descending), then alphabetically for ties
    scored.sort(key=lambda x: (-x[0], x[1]))

    # Return top suggestions
    return [candidate for _, candidate in scored[:max_suggestions]]

`validate_connectome_batches(batch_dir, verbose=False)` ¶

Validate integrity of HDF5 connectome batch files.

Parameters:

Name	Type	Description	Default
`batch_dir`	`str or Path`	Directory containing HDF5 batch files.	required
`verbose`	`bool`	Print validation results.	`True`

Returns:

Type	Description
`dict`	Validation summary with keys: n_batches, total_subjects, n_timepoints, n_voxels, mask_shape, consistent, errors

Source code in src/lacuna/utils/gsp1000.py

def validate_connectome_batches(batch_dir: str | Path, verbose: bool = False) -> dict:
    """Validate integrity of HDF5 connectome batch files.

    Parameters
    ----------
    batch_dir : str or Path
        Directory containing HDF5 batch files.
    verbose : bool, default=True
        Print validation results.

    Returns
    -------
    dict
        Validation summary with keys: n_batches, total_subjects, n_timepoints,
        n_voxels, mask_shape, consistent, errors
    """
    batch_dir = Path(batch_dir)
    batch_files = sorted(batch_dir.glob("*.h5"))

    if not batch_files:
        raise FileNotFoundError(f"No HDF5 files found in {batch_dir}")

    errors = []
    total_subjects = 0
    reference_metadata = None

    if verbose:
        print(f"Validating {len(batch_files)} batch files...")

    for batch_file in batch_files:
        try:
            with h5py.File(batch_file, "r") as hf:
                # Check required datasets
                required = ["timeseries", "mask_indices", "mask_affine"]
                for key in required:
                    if key not in hf:
                        errors.append(f"{batch_file.name}: Missing dataset '{key}'")

                # Extract metadata
                n_subjects = hf.attrs["n_subjects"]
                n_timepoints = hf.attrs["n_timepoints"]
                n_voxels = hf.attrs["n_voxels"]
                mask_shape = tuple(hf.attrs["mask_shape"])

                total_subjects += n_subjects

                # Check consistency with first batch
                if reference_metadata is None:
                    reference_metadata = {
                        "n_timepoints": n_timepoints,
                        "n_voxels": n_voxels,
                        "mask_shape": mask_shape,
                    }
                else:
                    if n_timepoints != reference_metadata["n_timepoints"]:
                        errors.append(
                            f"{batch_file.name}: Inconsistent n_timepoints "
                            f"({n_timepoints} vs {reference_metadata['n_timepoints']})"
                        )
                    if n_voxels != reference_metadata["n_voxels"]:
                        errors.append(
                            f"{batch_file.name}: Inconsistent n_voxels "
                            f"({n_voxels} vs {reference_metadata['n_voxels']})"
                        )

        except Exception as e:
            errors.append(f"{batch_file.name}: Error reading file - {e}")

    summary = {
        "n_batches": len(batch_files),
        "total_subjects": total_subjects,
        "consistent": len(errors) == 0,
        "errors": errors,
    }
    summary.update(reference_metadata or {})

    if verbose:
        if summary["consistent"]:
            print("✅ All batches valid!")
            print(f"  - {summary['n_batches']} batches")
            print(f"  - {summary['total_subjects']} total subjects")
            print(f"  - {summary['n_timepoints']} timepoints")
            print(f"  - {summary['n_voxels']:,} voxels")
        else:
            print(f"❌ Found {len(errors)} errors:")
            for error in errors:
                print(f"  - {error}")

    return summary

utils

lacuna.utils ¶

ConsoleLogger ¶

__init__(verbose=False, width=70, indent=' ', name='lacuna.analysis') ¶

blank_line() ¶

debug(message, indent_level=0) ¶

error(message, indent_level=0) ¶

info(message, indent_level=0) ¶

progress(message, current=None, total=None, percent=None, indent_level=0) ¶

result_summary(title, metrics, indent_level=0) ¶

section(title) ¶

subsection(title) ¶

success(message, details=None, indent_level=0) ¶

warning(message, indent_level=0) ¶

create_connectome_batches(gsp_dir, mask_path, output_dir, subjects_per_batch=50, pattern='sub-*/func/*bld001_rest_*_finalmask.nii.gz', verbose=False) ¶

format_suggestions(suggestions) ¶

log_error(message, verbose=False) ¶

log_info(message, verbose=False) ¶

log_progress(message, current=None, total=None, verbose=False) ¶

log_section(title, width=70, verbose=False) ¶

log_success(message, details=None, verbose=False) ¶

log_warning(message, verbose=False) ¶

suggest_similar(query, candidates, max_suggestions=3, min_similarity=0.4) ¶

validate_connectome_batches(batch_dir, verbose=False) ¶

`lacuna.utils` ¶

`ConsoleLogger` ¶

`init(verbose=False, width=70, indent=' ', name='lacuna.analysis')` ¶

`blank_line()` ¶

`debug(message, indent_level=0)` ¶

`error(message, indent_level=0)` ¶

`info(message, indent_level=0)` ¶

`progress(message, current=None, total=None, percent=None, indent_level=0)` ¶

`result_summary(title, metrics, indent_level=0)` ¶

`section(title)` ¶

`subsection(title)` ¶

`success(message, details=None, indent_level=0)` ¶

`warning(message, indent_level=0)` ¶

`create_connectome_batches(gsp_dir, mask_path, output_dir, subjects_per_batch=50, pattern='sub-/func/bld001_rest_*_finalmask.nii.gz', verbose=False)` ¶

`format_suggestions(suggestions)` ¶

`log_error(message, verbose=False)` ¶

`log_info(message, verbose=False)` ¶

`log_progress(message, current=None, total=None, verbose=False)` ¶

`log_section(title, width=70, verbose=False)` ¶

`log_success(message, details=None, verbose=False)` ¶

`log_warning(message, verbose=False)` ¶

`suggest_similar(query, candidates, max_suggestions=3, min_similarity=0.4)` ¶

`validate_connectome_batches(batch_dir, verbose=False)` ¶