Skip to content

utils

lacuna.utils

Utility functions for data preprocessing and connectome preparation.

This module provides tools for working with neuroimaging datasets and preparing data for lesion network mapping analyses.

Key Components

GSP1000 Utilities: - create_connectome_batches: Convert GSP1000 data to optimized HDF5 batches - validate_connectome_batches: Verify integrity of batch files

Logging Utilities: - ConsoleLogger: Consistent console logger for user-facing messages - log_section, log_info, log_success, log_warning, log_error, log_progress: Convenience functions

Suggestion Utilities: - suggest_similar: Find similar strings for error message suggestions - format_suggestions: Format suggestions for error messages

ConsoleLogger

Consistent console logger for user-facing messages.

Uses the standard Python logging module for output, ensuring consistent formatting with timestamps and module names across all Lacuna modules.

Parameters:

Name Type Description Default
verbose bool

If True, print messages. If False, silent mode (no output).

True
width int

Width for section headers

70
indent str

Indentation string for nested messages

" "
name str

Logger name for the Python logging module

"lacuna"

Examples:

>>> logger = ConsoleLogger(verbose=True)
>>> logger.section("PROCESSING DATA")
2026-01-15 10:00:00 - lacuna - INFO - ============================================
2026-01-15 10:00:00 - lacuna - INFO - PROCESSING DATA
2026-01-15 10:00:00 - lacuna - INFO - ============================================
>>> logger.info("Loading connectome...")
2026-01-15 10:00:00 - lacuna - INFO - Loading connectome...
>>> logger.success("Analysis complete", details={"subjects": 10, "time": 42.3})
2026-01-15 10:00:00 - lacuna - INFO - Analysis complete
2026-01-15 10:00:00 - lacuna - INFO -   subjects: 10
2026-01-15 10:00:00 - lacuna - INFO -   time: 42.3
Source code in src/lacuna/utils/logging.py
class ConsoleLogger:
    """
    Consistent console logger for user-facing messages.

    Uses the standard Python logging module for output, ensuring consistent
    formatting with timestamps and module names across all Lacuna modules.

    Parameters
    ----------
    verbose : bool, default=True
        If True, print messages. If False, silent mode (no output).
    width : int, default=70
        Width for section headers
    indent : str, default="  "
        Indentation string for nested messages
    name : str, default="lacuna"
        Logger name for the Python logging module

    Examples
    --------
    >>> logger = ConsoleLogger(verbose=True)
    >>> logger.section("PROCESSING DATA")
    2026-01-15 10:00:00 - lacuna - INFO - ============================================
    2026-01-15 10:00:00 - lacuna - INFO - PROCESSING DATA
    2026-01-15 10:00:00 - lacuna - INFO - ============================================

    >>> logger.info("Loading connectome...")
    2026-01-15 10:00:00 - lacuna - INFO - Loading connectome...

    >>> logger.success("Analysis complete", details={"subjects": 10, "time": 42.3})
    2026-01-15 10:00:00 - lacuna - INFO - Analysis complete
    2026-01-15 10:00:00 - lacuna - INFO -   subjects: 10
    2026-01-15 10:00:00 - lacuna - INFO -   time: 42.3
    """

    def __init__(
        self,
        verbose: bool = False,
        width: int = 70,
        indent: str = "  ",
        name: str = "lacuna.analysis",
    ):
        """Initialize console logger."""
        self.verbose = verbose
        self.width = width
        self.indent = indent
        self._logger = logging.getLogger(name)

    def _log(self, message: str, level: int = logging.INFO) -> None:
        """
        Log message if verbose mode is enabled.

        Parameters
        ----------
        message : str
            Message to log
        level : int
            Logging level (default: INFO)
        """
        if self.verbose:
            self._logger.log(level, message)

    def section(self, title: str) -> None:
        """
        Print a major section header.

        Parameters
        ----------
        title : str
            Section title

        Examples
        --------
        >>> logger.section("ANALYSIS PIPELINE")
        """
        if self.verbose:
            separator = "=" * self.width
            self._log("")
            self._log(separator)
            self._log(title)
            self._log(separator)

    def subsection(self, title: str) -> None:
        """
        Print a minor subsection header.

        Parameters
        ----------
        title : str
            Subsection title

        Examples
        --------
        >>> logger.subsection("Loading data")
        """
        if self.verbose:
            separator = "-" * self.width
            self._log("")
            self._log(separator)
            self._log(title)
            self._log(separator)

    def debug(self, message: str, indent_level: int = 0) -> None:
        """
        Print a debug message (only shown with -vv verbosity).

        Parameters
        ----------
        message : str
            Debug message
        indent_level : int, default=0
            Indentation level (0, 1, 2, ...)

        Examples
        --------
        >>> logger.debug("Computing correlation maps")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}", level=logging.DEBUG)

    def info(self, message: str, indent_level: int = 0) -> None:
        """
        Print an informational message.

        Parameters
        ----------
        message : str
            Information message
        indent_level : int, default=0
            Indentation level (0, 1, 2, ...)

        Examples
        --------
        >>> logger.info("Loading mask information...")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}")

    def success(
        self,
        message: str,
        details: dict | None = None,
        indent_level: int = 0,
    ) -> None:
        """
        Print a success message with optional details.

        Parameters
        ----------
        message : str
            Success message
        details : dict, optional
            Dictionary of key-value pairs to display
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.success("Analysis complete", details={"time": 42.3, "subjects": 10})
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}")

        if details and self.verbose:
            detail_indent = self.indent * (indent_level + 1)
            for key, value in details.items():
                # Format numbers nicely
                if isinstance(value, float):
                    formatted_value = f"{value:.2f}"
                elif isinstance(value, int) and value >= 1000:
                    formatted_value = f"{value:,}"
                else:
                    formatted_value = str(value)

                self._log(f"{detail_indent}{key}: {formatted_value}")

    def warning(self, message: str, indent_level: int = 0) -> None:
        """
        Print a warning message.

        Parameters
        ----------
        message : str
            Warning message
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.warning("Mask size smaller than expected")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}", level=logging.WARNING)

    def error(self, message: str, indent_level: int = 0) -> None:
        """
        Print an error message.

        Parameters
        ----------
        message : str
            Error message
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.error("Failed to load connectome")
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{message}", level=logging.ERROR)

    def progress(
        self,
        message: str,
        current: int | None = None,
        total: int | None = None,
        percent: float | None = None,
        indent_level: int = 0,
    ) -> None:
        """
        Print a progress update.

        Parameters
        ----------
        message : str
            Progress message
        current : int, optional
            Current item number
        total : int, optional
            Total items
        percent : float, optional
            Completion percentage (0-100)
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.progress("Processing batch", current=3, total=10)
        >>> logger.progress("Loading data", percent=65.5)
        """
        indent = self.indent * indent_level
        progress_str = f"{indent}{message}"

        if current is not None and total is not None:
            progress_str += f" [{current}/{total}]"
        elif percent is not None:
            progress_str += f" [{percent:.1f}%]"

        self._log(progress_str)

    def result_summary(self, title: str, metrics: dict, indent_level: int = 0) -> None:
        """
        Print a formatted summary of results.

        Parameters
        ----------
        title : str
            Summary title
        metrics : dict
            Dictionary of metric name: value pairs
        indent_level : int, default=0
            Indentation level

        Examples
        --------
        >>> logger.result_summary("Analysis Results", {
        ...     "Mean correlation": 0.4523,
        ...     "Std correlation": 0.1234,
        ...     "Range": "[-0.45, 0.89]"
        ... })
        """
        indent = self.indent * indent_level
        self._log(f"{indent}{title}:")

        detail_indent = self.indent * (indent_level + 1)
        for key, value in metrics.items():
            if isinstance(value, float):
                formatted_value = f"{value:.4f}"
            elif isinstance(value, int) and value >= 1000:
                formatted_value = f"{value:,}"
            else:
                formatted_value = str(value)

            self._log(f"{detail_indent}{key}: {formatted_value}")

    def blank_line(self) -> None:
        """Print a blank line for spacing."""
        if self.verbose:
            self._log("")

__init__(verbose=False, width=70, indent=' ', name='lacuna.analysis')

Initialize console logger.

Source code in src/lacuna/utils/logging.py
def __init__(
    self,
    verbose: bool = False,
    width: int = 70,
    indent: str = "  ",
    name: str = "lacuna.analysis",
):
    """Initialize console logger."""
    self.verbose = verbose
    self.width = width
    self.indent = indent
    self._logger = logging.getLogger(name)

blank_line()

Print a blank line for spacing.

Source code in src/lacuna/utils/logging.py
def blank_line(self) -> None:
    """Print a blank line for spacing."""
    if self.verbose:
        self._log("")

debug(message, indent_level=0)

Print a debug message (only shown with -vv verbosity).

Parameters:

Name Type Description Default
message str

Debug message

required
indent_level int

Indentation level (0, 1, 2, ...)

0

Examples:

>>> logger.debug("Computing correlation maps")
Source code in src/lacuna/utils/logging.py
def debug(self, message: str, indent_level: int = 0) -> None:
    """
    Print a debug message (only shown with -vv verbosity).

    Parameters
    ----------
    message : str
        Debug message
    indent_level : int, default=0
        Indentation level (0, 1, 2, ...)

    Examples
    --------
    >>> logger.debug("Computing correlation maps")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}", level=logging.DEBUG)

error(message, indent_level=0)

Print an error message.

Parameters:

Name Type Description Default
message str

Error message

required
indent_level int

Indentation level

0

Examples:

>>> logger.error("Failed to load connectome")
Source code in src/lacuna/utils/logging.py
def error(self, message: str, indent_level: int = 0) -> None:
    """
    Print an error message.

    Parameters
    ----------
    message : str
        Error message
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.error("Failed to load connectome")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}", level=logging.ERROR)

info(message, indent_level=0)

Print an informational message.

Parameters:

Name Type Description Default
message str

Information message

required
indent_level int

Indentation level (0, 1, 2, ...)

0

Examples:

>>> logger.info("Loading mask information...")
Source code in src/lacuna/utils/logging.py
def info(self, message: str, indent_level: int = 0) -> None:
    """
    Print an informational message.

    Parameters
    ----------
    message : str
        Information message
    indent_level : int, default=0
        Indentation level (0, 1, 2, ...)

    Examples
    --------
    >>> logger.info("Loading mask information...")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}")

progress(message, current=None, total=None, percent=None, indent_level=0)

Print a progress update.

Parameters:

Name Type Description Default
message str

Progress message

required
current int

Current item number

None
total int

Total items

None
percent float

Completion percentage (0-100)

None
indent_level int

Indentation level

0

Examples:

>>> logger.progress("Processing batch", current=3, total=10)
>>> logger.progress("Loading data", percent=65.5)
Source code in src/lacuna/utils/logging.py
def progress(
    self,
    message: str,
    current: int | None = None,
    total: int | None = None,
    percent: float | None = None,
    indent_level: int = 0,
) -> None:
    """
    Print a progress update.

    Parameters
    ----------
    message : str
        Progress message
    current : int, optional
        Current item number
    total : int, optional
        Total items
    percent : float, optional
        Completion percentage (0-100)
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.progress("Processing batch", current=3, total=10)
    >>> logger.progress("Loading data", percent=65.5)
    """
    indent = self.indent * indent_level
    progress_str = f"{indent}{message}"

    if current is not None and total is not None:
        progress_str += f" [{current}/{total}]"
    elif percent is not None:
        progress_str += f" [{percent:.1f}%]"

    self._log(progress_str)

result_summary(title, metrics, indent_level=0)

Print a formatted summary of results.

Parameters:

Name Type Description Default
title str

Summary title

required
metrics dict

Dictionary of metric name: value pairs

required
indent_level int

Indentation level

0

Examples:

>>> logger.result_summary("Analysis Results", {
...     "Mean correlation": 0.4523,
...     "Std correlation": 0.1234,
...     "Range": "[-0.45, 0.89]"
... })
Source code in src/lacuna/utils/logging.py
def result_summary(self, title: str, metrics: dict, indent_level: int = 0) -> None:
    """
    Print a formatted summary of results.

    Parameters
    ----------
    title : str
        Summary title
    metrics : dict
        Dictionary of metric name: value pairs
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.result_summary("Analysis Results", {
    ...     "Mean correlation": 0.4523,
    ...     "Std correlation": 0.1234,
    ...     "Range": "[-0.45, 0.89]"
    ... })
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{title}:")

    detail_indent = self.indent * (indent_level + 1)
    for key, value in metrics.items():
        if isinstance(value, float):
            formatted_value = f"{value:.4f}"
        elif isinstance(value, int) and value >= 1000:
            formatted_value = f"{value:,}"
        else:
            formatted_value = str(value)

        self._log(f"{detail_indent}{key}: {formatted_value}")

section(title)

Print a major section header.

Parameters:

Name Type Description Default
title str

Section title

required

Examples:

>>> logger.section("ANALYSIS PIPELINE")
Source code in src/lacuna/utils/logging.py
def section(self, title: str) -> None:
    """
    Print a major section header.

    Parameters
    ----------
    title : str
        Section title

    Examples
    --------
    >>> logger.section("ANALYSIS PIPELINE")
    """
    if self.verbose:
        separator = "=" * self.width
        self._log("")
        self._log(separator)
        self._log(title)
        self._log(separator)

subsection(title)

Print a minor subsection header.

Parameters:

Name Type Description Default
title str

Subsection title

required

Examples:

>>> logger.subsection("Loading data")
Source code in src/lacuna/utils/logging.py
def subsection(self, title: str) -> None:
    """
    Print a minor subsection header.

    Parameters
    ----------
    title : str
        Subsection title

    Examples
    --------
    >>> logger.subsection("Loading data")
    """
    if self.verbose:
        separator = "-" * self.width
        self._log("")
        self._log(separator)
        self._log(title)
        self._log(separator)

success(message, details=None, indent_level=0)

Print a success message with optional details.

Parameters:

Name Type Description Default
message str

Success message

required
details dict

Dictionary of key-value pairs to display

None
indent_level int

Indentation level

0

Examples:

>>> logger.success("Analysis complete", details={"time": 42.3, "subjects": 10})
Source code in src/lacuna/utils/logging.py
def success(
    self,
    message: str,
    details: dict | None = None,
    indent_level: int = 0,
) -> None:
    """
    Print a success message with optional details.

    Parameters
    ----------
    message : str
        Success message
    details : dict, optional
        Dictionary of key-value pairs to display
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.success("Analysis complete", details={"time": 42.3, "subjects": 10})
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}")

    if details and self.verbose:
        detail_indent = self.indent * (indent_level + 1)
        for key, value in details.items():
            # Format numbers nicely
            if isinstance(value, float):
                formatted_value = f"{value:.2f}"
            elif isinstance(value, int) and value >= 1000:
                formatted_value = f"{value:,}"
            else:
                formatted_value = str(value)

            self._log(f"{detail_indent}{key}: {formatted_value}")

warning(message, indent_level=0)

Print a warning message.

Parameters:

Name Type Description Default
message str

Warning message

required
indent_level int

Indentation level

0

Examples:

>>> logger.warning("Mask size smaller than expected")
Source code in src/lacuna/utils/logging.py
def warning(self, message: str, indent_level: int = 0) -> None:
    """
    Print a warning message.

    Parameters
    ----------
    message : str
        Warning message
    indent_level : int, default=0
        Indentation level

    Examples
    --------
    >>> logger.warning("Mask size smaller than expected")
    """
    indent = self.indent * indent_level
    self._log(f"{indent}{message}", level=logging.WARNING)

create_connectome_batches(gsp_dir, mask_path, output_dir, subjects_per_batch=50, pattern='sub-*/func/*bld001_rest_*_finalmask.nii.gz', verbose=False)

Create HDF5 batch files from GSP1000 functional data.

Scans a directory of functional NIfTI files, extracts time-series from within a brain mask, and saves the data into multiple smaller HDF5 batch files optimized for memory-efficient lesion network mapping.

Parameters:

Name Type Description Default
gsp_dir str or Path

Directory containing GSP1000 subject functional data. Expected structure: sub-/func/bld001_rest_*_finalmask.nii.gz

required
mask_path str or Path

Path to brain mask NIfTI file (e.g., MNI152_T1_2mm_Brain_Mask.nii.gz). Defines which voxels to extract.

required
output_dir str or Path

Directory where HDF5 batch files will be saved.

required
subjects_per_batch int

Number of subjects to include in each batch file. Larger batches = fewer files but more memory per batch.

50
pattern str

Glob pattern to find functional files within gsp_dir. Default matches standard GSP1000 structure.

'sub-*/func/*bld001_rest_*_finalmask.nii.gz'
verbose bool

Print progress information.

True

Returns:

Type Description
list of Path

Paths to created HDF5 batch files, sorted by name.

Notes

Each HDF5 batch file contains: - 'timeseries': (n_subjects, n_timepoints, n_voxels) float32 array - 'mask_indices': (3, n_voxels) array of mask coordinates - 'mask_affine': (4, 4) affine transformation matrix - Attributes: n_subjects, n_timepoints, n_voxels, mask_shape

The batch files are designed for sequential loading during analysis, minimizing memory footprint while maintaining processing speed.

Examples:

>>> from lacuna.utils.gsp1000 import create_connectome_batches
>>> batch_files = create_connectome_batches(
...     gsp_dir="/data/GSP1000",
...     mask_path="/data/MNI152_T1_2mm_Brain_Mask.nii.gz",
...     output_dir="/data/connectomes/gsp1000_batches",
...     subjects_per_batch=100
... )
>>> print(f"Created {len(batch_files)} batch files")
Source code in src/lacuna/utils/gsp1000.py
def create_connectome_batches(
    gsp_dir: str | Path,
    mask_path: str | Path,
    output_dir: str | Path,
    subjects_per_batch: int = 50,
    pattern: str = "sub-*/func/*bld001_rest_*_finalmask.nii.gz",
    verbose: bool = False,
) -> list[Path]:
    """Create HDF5 batch files from GSP1000 functional data.

    Scans a directory of functional NIfTI files, extracts time-series from
    within a brain mask, and saves the data into multiple smaller HDF5 batch
    files optimized for memory-efficient lesion network mapping.

    Parameters
    ----------
    gsp_dir : str or Path
        Directory containing GSP1000 subject functional data.
        Expected structure: sub-*/func/*bld001_rest_*_finalmask.nii.gz
    mask_path : str or Path
        Path to brain mask NIfTI file (e.g., MNI152_T1_2mm_Brain_Mask.nii.gz).
        Defines which voxels to extract.
    output_dir : str or Path
        Directory where HDF5 batch files will be saved.
    subjects_per_batch : int, default=50
        Number of subjects to include in each batch file.
        Larger batches = fewer files but more memory per batch.
    pattern : str, optional
        Glob pattern to find functional files within gsp_dir.
        Default matches standard GSP1000 structure.
    verbose : bool, default=True
        Print progress information.

    Returns
    -------
    list of Path
        Paths to created HDF5 batch files, sorted by name.

    Notes
    -----
    Each HDF5 batch file contains:
    - 'timeseries': (n_subjects, n_timepoints, n_voxels) float32 array
    - 'mask_indices': (3, n_voxels) array of mask coordinates
    - 'mask_affine': (4, 4) affine transformation matrix
    - Attributes: n_subjects, n_timepoints, n_voxels, mask_shape

    The batch files are designed for sequential loading during analysis,
    minimizing memory footprint while maintaining processing speed.

    Examples
    --------
    >>> from lacuna.utils.gsp1000 import create_connectome_batches
    >>> batch_files = create_connectome_batches(
    ...     gsp_dir="/data/GSP1000",
    ...     mask_path="/data/MNI152_T1_2mm_Brain_Mask.nii.gz",
    ...     output_dir="/data/connectomes/gsp1000_batches",
    ...     subjects_per_batch=100
    ... )
    >>> print(f"Created {len(batch_files)} batch files")
    """
    gsp_dir = Path(gsp_dir)
    mask_path = Path(mask_path)
    output_dir = Path(output_dir)

    if verbose:
        print("🚀 Creating connectome batch files from GSP1000 data...")

    # 1. Find all functional NIfTI files
    search_pattern = str(gsp_dir / pattern)
    all_subject_files = sorted(glob.glob(search_pattern))

    if not all_subject_files:
        raise FileNotFoundError(
            f"No NIfTI files found matching pattern: {search_pattern}\n"
            f"Expected structure: {gsp_dir}/sub-*/func/*bld001_rest_*_finalmask.nii.gz"
        )

    n_total_subjects = len(all_subject_files)
    if verbose:
        print(f"✓ Found {n_total_subjects} subject files")

    # 2. Load brain mask and extract metadata
    if verbose:
        print(f"Loading brain mask: {mask_path}")
    mask_img = nib.load(mask_path)
    mask_data = mask_img.get_fdata().astype(bool)
    mask_affine = mask_img.affine
    in_mask_indices = np.where(mask_data)
    n_voxels = len(in_mask_indices[0])

    # Get number of timepoints from first subject
    first_img = nib.load(all_subject_files[0])
    n_timepoints = first_img.shape[3]

    if verbose:
        print(f"✓ Mask contains {n_voxels:,} in-brain voxels")
        print(f"✓ Detected {n_timepoints} timepoints per subject")

    # 3. Split subjects into batches
    subject_batches = [
        all_subject_files[i : i + subjects_per_batch]
        for i in range(0, n_total_subjects, subjects_per_batch)
    ]
    n_batches = len(subject_batches)

    if verbose:
        print(f"✓ Will create {n_batches} batch files ({subjects_per_batch} subjects/batch)")

    output_dir.mkdir(parents=True, exist_ok=True)
    created_files = []

    # 4. Process each batch
    progress_iter = tqdm(subject_batches, desc="Creating batches") if verbose else subject_batches

    for batch_idx, batch_files in enumerate(progress_iter):
        batch_filename = output_dir / f"connectome_batch_{batch_idx:03d}.h5"
        n_subjects_in_batch = len(batch_files)

        with h5py.File(batch_filename, "w") as hf:
            # Create dataset for this batch
            timeseries_dset = hf.create_dataset(
                "timeseries",
                shape=(n_subjects_in_batch, n_timepoints, n_voxels),
                dtype=np.float32,
                chunks=(1, n_timepoints, n_voxels),
                compression="gzip",
                compression_opts=1,  # Fast compression
            )

            # Store metadata (makes each batch self-contained)
            hf.create_dataset("mask_indices", data=np.vstack(in_mask_indices))
            hf.create_dataset("mask_affine", data=mask_affine)
            hf.attrs["n_subjects"] = n_subjects_in_batch
            hf.attrs["n_timepoints"] = n_timepoints
            hf.attrs["n_voxels"] = n_voxels
            hf.attrs["mask_shape"] = mask_data.shape

            # Process subjects in current batch
            for subj_idx, func_path in enumerate(batch_files):
                func_img = nib.load(func_path)
                func_data = func_img.get_fdata()

                # Extract in-mask voxels and transpose to (timepoints, voxels)
                subject_timeseries = func_data[in_mask_indices].T

                # Save to HDF5
                timeseries_dset[subj_idx, :, :] = subject_timeseries

        created_files.append(batch_filename)

    if verbose:
        print(f"\n✅ Created {len(created_files)} batch files in {output_dir}")
        total_size_mb = sum(f.stat().st_size for f in created_files) / (1024**2)
        print(f"✓ Total size: {total_size_mb:.1f} MB")

    return created_files

format_suggestions(suggestions)

Format a list of suggestions for inclusion in an error message.

Parameters:

Name Type Description Default
suggestions list[str]

List of suggested strings.

required

Returns:

Type Description
str

Formatted string for error message, or empty string if no suggestions.

Examples:

>>> format_suggestions(["rmap"])
"Did you mean 'rmap'?"
>>> format_suggestions(["rmap", "zscoremap"])
"Did you mean one of: 'rmap', 'zscoremap'?"
>>> format_suggestions([])
''
Source code in src/lacuna/utils/suggestions.py
def format_suggestions(suggestions: list[str]) -> str:
    """
    Format a list of suggestions for inclusion in an error message.

    Parameters
    ----------
    suggestions : list[str]
        List of suggested strings.

    Returns
    -------
    str
        Formatted string for error message, or empty string if no suggestions.

    Examples
    --------
    >>> format_suggestions(["rmap"])
    "Did you mean 'rmap'?"

    >>> format_suggestions(["rmap", "zscoremap"])
    "Did you mean one of: 'rmap', 'zscoremap'?"

    >>> format_suggestions([])
    ''
    """
    if not suggestions:
        return ""

    if len(suggestions) == 1:
        return f"Did you mean '{suggestions[0]}'?"

    quoted = [f"'{s}'" for s in suggestions]
    return f"Did you mean one of: {', '.join(quoted)}?"

log_error(message, verbose=False)

Print an error message.

Source code in src/lacuna/utils/logging.py
def log_error(message: str, verbose: bool = False) -> None:
    """Print an error message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.error(message)

log_info(message, verbose=False)

Print an info message.

Source code in src/lacuna/utils/logging.py
def log_info(message: str, verbose: bool = False) -> None:
    """Print an info message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.info(message)

log_progress(message, current=None, total=None, verbose=False)

Print a progress message.

Source code in src/lacuna/utils/logging.py
def log_progress(
    message: str,
    current: int | None = None,
    total: int | None = None,
    verbose: bool = False,
) -> None:
    """Print a progress message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.progress(message, current=current, total=total)

log_section(title, width=70, verbose=False)

Print a section header.

Source code in src/lacuna/utils/logging.py
def log_section(title: str, width: int = 70, verbose: bool = False) -> None:
    """Print a section header."""
    logger = ConsoleLogger(verbose=verbose, width=width)
    logger.section(title)

log_success(message, details=None, verbose=False)

Print a success message.

Source code in src/lacuna/utils/logging.py
def log_success(message: str, details: dict | None = None, verbose: bool = False) -> None:
    """Print a success message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.success(message, details=details)

log_warning(message, verbose=False)

Print a warning message.

Source code in src/lacuna/utils/logging.py
def log_warning(message: str, verbose: bool = False) -> None:
    """Print a warning message."""
    logger = ConsoleLogger(verbose=verbose)
    logger.warning(message)

suggest_similar(query, candidates, max_suggestions=3, min_similarity=0.4)

Find candidates most similar to the query string.

Uses difflib.SequenceMatcher for similarity scoring. Results are sorted by similarity (most similar first) and filtered by minimum threshold.

Parameters:

Name Type Description Default
query str

The string to find matches for (e.g., user's typo).

required
candidates list[str]

Available options to suggest from.

required
max_suggestions int

Maximum number of suggestions to return.

3
min_similarity float

Minimum similarity ratio (0.0 to 1.0) to include a suggestion. Higher values require closer matches.

0.4

Returns:

Type Description
list[str]

Up to max_suggestions similar candidates, sorted by similarity. Empty list if no candidates meet the minimum similarity threshold.

Examples:

>>> available = ["rmap", "zscoremap", "damagescore"]
>>> suggest_similar("rmp", available)
['rmap']
>>> suggest_similar("score", available)
['zscoremap', 'damagescore']
>>> suggest_similar("xyz", available, min_similarity=0.5)
[]  # No close matches
>>> # Case-insensitive matching
>>> suggest_similar("Rmap", available)
['rmap']
Source code in src/lacuna/utils/suggestions.py
def suggest_similar(
    query: str,
    candidates: list[str],
    max_suggestions: int = 3,
    min_similarity: float = 0.4,
) -> list[str]:
    """
    Find candidates most similar to the query string.

    Uses difflib.SequenceMatcher for similarity scoring. Results are sorted
    by similarity (most similar first) and filtered by minimum threshold.

    Parameters
    ----------
    query : str
        The string to find matches for (e.g., user's typo).
    candidates : list[str]
        Available options to suggest from.
    max_suggestions : int, default=3
        Maximum number of suggestions to return.
    min_similarity : float, default=0.4
        Minimum similarity ratio (0.0 to 1.0) to include a suggestion.
        Higher values require closer matches.

    Returns
    -------
    list[str]
        Up to `max_suggestions` similar candidates, sorted by similarity.
        Empty list if no candidates meet the minimum similarity threshold.

    Examples
    --------
    >>> available = ["rmap", "zscoremap", "damagescore"]
    >>> suggest_similar("rmp", available)
    ['rmap']

    >>> suggest_similar("score", available)
    ['zscoremap', 'damagescore']

    >>> suggest_similar("xyz", available, min_similarity=0.5)
    []  # No close matches

    >>> # Case-insensitive matching
    >>> suggest_similar("Rmap", available)
    ['rmap']
    """
    if not candidates:
        return []

    # Compute similarity for each candidate
    query_lower = query.lower()
    scored = []

    for candidate in candidates:
        # Use case-insensitive comparison for scoring
        ratio = SequenceMatcher(None, query_lower, candidate.lower()).ratio()
        if ratio >= min_similarity:
            scored.append((ratio, candidate))

    # Sort by similarity (descending), then alphabetically for ties
    scored.sort(key=lambda x: (-x[0], x[1]))

    # Return top suggestions
    return [candidate for _, candidate in scored[:max_suggestions]]

validate_connectome_batches(batch_dir, verbose=False)

Validate integrity of HDF5 connectome batch files.

Parameters:

Name Type Description Default
batch_dir str or Path

Directory containing HDF5 batch files.

required
verbose bool

Print validation results.

True

Returns:

Type Description
dict

Validation summary with keys: n_batches, total_subjects, n_timepoints, n_voxels, mask_shape, consistent, errors

Source code in src/lacuna/utils/gsp1000.py
def validate_connectome_batches(batch_dir: str | Path, verbose: bool = False) -> dict:
    """Validate integrity of HDF5 connectome batch files.

    Parameters
    ----------
    batch_dir : str or Path
        Directory containing HDF5 batch files.
    verbose : bool, default=True
        Print validation results.

    Returns
    -------
    dict
        Validation summary with keys: n_batches, total_subjects, n_timepoints,
        n_voxels, mask_shape, consistent, errors
    """
    batch_dir = Path(batch_dir)
    batch_files = sorted(batch_dir.glob("*.h5"))

    if not batch_files:
        raise FileNotFoundError(f"No HDF5 files found in {batch_dir}")

    errors = []
    total_subjects = 0
    reference_metadata = None

    if verbose:
        print(f"Validating {len(batch_files)} batch files...")

    for batch_file in batch_files:
        try:
            with h5py.File(batch_file, "r") as hf:
                # Check required datasets
                required = ["timeseries", "mask_indices", "mask_affine"]
                for key in required:
                    if key not in hf:
                        errors.append(f"{batch_file.name}: Missing dataset '{key}'")

                # Extract metadata
                n_subjects = hf.attrs["n_subjects"]
                n_timepoints = hf.attrs["n_timepoints"]
                n_voxels = hf.attrs["n_voxels"]
                mask_shape = tuple(hf.attrs["mask_shape"])

                total_subjects += n_subjects

                # Check consistency with first batch
                if reference_metadata is None:
                    reference_metadata = {
                        "n_timepoints": n_timepoints,
                        "n_voxels": n_voxels,
                        "mask_shape": mask_shape,
                    }
                else:
                    if n_timepoints != reference_metadata["n_timepoints"]:
                        errors.append(
                            f"{batch_file.name}: Inconsistent n_timepoints "
                            f"({n_timepoints} vs {reference_metadata['n_timepoints']})"
                        )
                    if n_voxels != reference_metadata["n_voxels"]:
                        errors.append(
                            f"{batch_file.name}: Inconsistent n_voxels "
                            f"({n_voxels} vs {reference_metadata['n_voxels']})"
                        )

        except Exception as e:
            errors.append(f"{batch_file.name}: Error reading file - {e}")

    summary = {
        "n_batches": len(batch_files),
        "total_subjects": total_subjects,
        "consistent": len(errors) == 0,
        "errors": errors,
    }
    summary.update(reference_metadata or {})

    if verbose:
        if summary["consistent"]:
            print("✅ All batches valid!")
            print(f"  - {summary['n_batches']} batches")
            print(f"  - {summary['total_subjects']} total subjects")
            print(f"  - {summary['n_timepoints']} timepoints")
            print(f"  - {summary['n_voxels']:,} voxels")
        else:
            print(f"❌ Found {len(errors)} errors:")
            for error in errors:
                print(f"  - {error}")

    return summary