Skip to content

base

lacuna.io.downloaders.base

Base classes and registry for connectome downloaders.

This module provides: - ConnectomeSource: Configuration for a fetchable connectome - FetchConfig: User-specified fetch configuration - FetchProgress: Progress tracking for downloads - FetchResult: Outcome of a fetch operation - CONNECTOME_SOURCES: Registry of available connectomes - get_api_key: API key resolution helper

BaseDownloader

Bases: ABC

Abstract base class for downloaders.

Source code in src/lacuna/io/downloaders/base.py
class BaseDownloader(ABC):
    """Abstract base class for downloaders."""

    def __init__(self, source: ConnectomeSource):
        """
        Initialize downloader with source configuration.

        Parameters
        ----------
        source : ConnectomeSource
            Configuration for the connectome source.
        """
        self.source = source

    @abstractmethod
    def download(
        self,
        output_path: Path,
        progress_callback: Callable[[FetchProgress], None] | None = None,
    ) -> list[Path]:
        """
        Download files to output path.

        Parameters
        ----------
        output_path : Path
            Directory to download files to.
        progress_callback : callable, optional
            Function called with FetchProgress updates.

        Returns
        -------
        list[Path]
            List of downloaded file paths.
        """
        ...

__init__(source)

Initialize downloader with source configuration.

Parameters:

Name Type Description Default
source ConnectomeSource

Configuration for the connectome source.

required
Source code in src/lacuna/io/downloaders/base.py
def __init__(self, source: ConnectomeSource):
    """
    Initialize downloader with source configuration.

    Parameters
    ----------
    source : ConnectomeSource
        Configuration for the connectome source.
    """
    self.source = source

download(output_path, progress_callback=None) abstractmethod

Download files to output path.

Parameters:

Name Type Description Default
output_path Path

Directory to download files to.

required
progress_callback callable

Function called with FetchProgress updates.

None

Returns:

Type Description
list[Path]

List of downloaded file paths.

Source code in src/lacuna/io/downloaders/base.py
@abstractmethod
def download(
    self,
    output_path: Path,
    progress_callback: Callable[[FetchProgress], None] | None = None,
) -> list[Path]:
    """
    Download files to output path.

    Parameters
    ----------
    output_path : Path
        Directory to download files to.
    progress_callback : callable, optional
        Function called with FetchProgress updates.

    Returns
    -------
    list[Path]
        List of downloaded file paths.
    """
    ...

ConnectomeSource dataclass

Configuration for a fetchable connectome source.

Source code in src/lacuna/io/downloaders/base.py
@dataclass
class ConnectomeSource:
    """Configuration for a fetchable connectome source."""

    name: str
    """Unique identifier (e.g., 'gsp1000', 'dtor985')."""

    display_name: str
    """Human-readable name (e.g., 'GSP1000 Functional Connectome')."""

    type: Literal["functional", "structural"]
    """Connectome type determining processing pipeline."""

    description: str
    """User-facing description of the connectome."""

    source_type: Literal["dataverse", "figshare", "github"]
    """Download source requiring specific authentication/handling."""

    # Dataverse-specific
    persistent_id: str | None = None
    """DOI for Dataverse datasets (e.g., 'doi:10.7910/DVN/ILXIKS')."""

    dataverse_server: str = "https://dataverse.harvard.edu"
    """Dataverse server URL."""

    # Figshare-specific
    download_url: str | None = None
    """Direct download URL for Figshare files (deprecated, use article_id)."""

    article_id: int | None = None
    """Figshare article ID for API-based downloads."""

    # Processing
    default_batches: int = 10
    """Default number of HDF5 batches (functional only)."""

    requires_mask: bool = False
    """Whether brain mask is needed for processing."""

    mask_url: str | None = None
    """URL to download brain mask if required."""

    # Metadata
    n_subjects: int = 0
    """Number of subjects in the connectome."""

    space: str = "MNI152NLin6Asym"
    """Coordinate space."""

    estimated_size_gb: float = 0.0
    """Estimated download size in GB for user information."""

    citation: str = ""
    """Citation text for this connectome dataset."""

article_id = None class-attribute instance-attribute

Figshare article ID for API-based downloads.

citation = '' class-attribute instance-attribute

Citation text for this connectome dataset.

dataverse_server = 'https://dataverse.harvard.edu' class-attribute instance-attribute

Dataverse server URL.

default_batches = 10 class-attribute instance-attribute

Default number of HDF5 batches (functional only).

description instance-attribute

User-facing description of the connectome.

display_name instance-attribute

Human-readable name (e.g., 'GSP1000 Functional Connectome').

download_url = None class-attribute instance-attribute

Direct download URL for Figshare files (deprecated, use article_id).

estimated_size_gb = 0.0 class-attribute instance-attribute

Estimated download size in GB for user information.

mask_url = None class-attribute instance-attribute

URL to download brain mask if required.

n_subjects = 0 class-attribute instance-attribute

Number of subjects in the connectome.

name instance-attribute

Unique identifier (e.g., 'gsp1000', 'dtor985').

persistent_id = None class-attribute instance-attribute

DOI for Dataverse datasets (e.g., 'doi:10.7910/DVN/ILXIKS').

requires_mask = False class-attribute instance-attribute

Whether brain mask is needed for processing.

source_type instance-attribute

Download source requiring specific authentication/handling.

space = 'MNI152NLin6Asym' class-attribute instance-attribute

Coordinate space.

type instance-attribute

Connectome type determining processing pipeline.

DownloaderProtocol

Bases: Protocol

Protocol for source-specific downloaders.

Source code in src/lacuna/io/downloaders/base.py
class DownloaderProtocol(Protocol):
    """Protocol for source-specific downloaders."""

    def download(
        self,
        output_path: Path,
        progress_callback: Callable[[FetchProgress], None] | None = None,
    ) -> list[Path]:
        """Download files to output path."""
        ...

download(output_path, progress_callback=None)

Download files to output path.

Source code in src/lacuna/io/downloaders/base.py
def download(
    self,
    output_path: Path,
    progress_callback: Callable[[FetchProgress], None] | None = None,
) -> list[Path]:
    """Download files to output path."""
    ...

FetchConfig dataclass

Configuration for a connectome fetch operation.

Source code in src/lacuna/io/downloaders/base.py
@dataclass
class FetchConfig:
    """Configuration for a connectome fetch operation."""

    connectome: str
    """Connectome name to fetch (e.g., 'gsp1000', 'dtor985')."""

    output_dir: Path
    """Directory for processed output files."""

    # Authentication
    api_key: str | None = None
    """Dataverse API key (for GSP1000). Can also use DATAVERSE_API_KEY env var."""

    # Processing options
    batches: int = 10
    """Number of HDF5 batch files for functional connectomes."""

    keep_original: bool = True
    """Keep original downloaded files after processing."""

    # Registration
    register: bool = True
    """Automatically register connectome after processing."""

    register_name: str | None = None
    """Custom name for registration. Defaults to source name (e.g., 'GSP1000')."""

    # Behavior
    force: bool = False
    """Overwrite existing files and registrations."""

    resume: bool = True
    """Resume interrupted downloads."""

    @classmethod
    def from_cli_args(cls, args: argparse.Namespace) -> FetchConfig:
        """Create config from CLI arguments."""
        return cls(
            connectome=getattr(args, "connectome", ""),
            output_dir=Path(getattr(args, "output_dir", ".")),
            api_key=getattr(args, "api_key", None),
            batches=getattr(args, "batches", 10),
            keep_original=not getattr(args, "no_keep_original", False),
            register=not getattr(args, "no_register", False),
            register_name=getattr(args, "register_name", None),
            force=getattr(args, "force", False),
            resume=getattr(args, "resume", True),
        )

    def get_api_key(self) -> str | None:
        """Get API key from config, env var, or config file."""
        if self.api_key:
            return self.api_key
        if key := os.environ.get("DATAVERSE_API_KEY"):
            return key
        # Check config file
        return _load_config_file_key()

api_key = None class-attribute instance-attribute

Dataverse API key (for GSP1000). Can also use DATAVERSE_API_KEY env var.

batches = 10 class-attribute instance-attribute

Number of HDF5 batch files for functional connectomes.

connectome instance-attribute

Connectome name to fetch (e.g., 'gsp1000', 'dtor985').

force = False class-attribute instance-attribute

Overwrite existing files and registrations.

keep_original = True class-attribute instance-attribute

Keep original downloaded files after processing.

output_dir instance-attribute

Directory for processed output files.

register = True class-attribute instance-attribute

Automatically register connectome after processing.

register_name = None class-attribute instance-attribute

Custom name for registration. Defaults to source name (e.g., 'GSP1000').

resume = True class-attribute instance-attribute

Resume interrupted downloads.

from_cli_args(args) classmethod

Create config from CLI arguments.

Source code in src/lacuna/io/downloaders/base.py
@classmethod
def from_cli_args(cls, args: argparse.Namespace) -> FetchConfig:
    """Create config from CLI arguments."""
    return cls(
        connectome=getattr(args, "connectome", ""),
        output_dir=Path(getattr(args, "output_dir", ".")),
        api_key=getattr(args, "api_key", None),
        batches=getattr(args, "batches", 10),
        keep_original=not getattr(args, "no_keep_original", False),
        register=not getattr(args, "no_register", False),
        register_name=getattr(args, "register_name", None),
        force=getattr(args, "force", False),
        resume=getattr(args, "resume", True),
    )

get_api_key()

Get API key from config, env var, or config file.

Source code in src/lacuna/io/downloaders/base.py
def get_api_key(self) -> str | None:
    """Get API key from config, env var, or config file."""
    if self.api_key:
        return self.api_key
    if key := os.environ.get("DATAVERSE_API_KEY"):
        return key
    # Check config file
    return _load_config_file_key()

FetchProgress dataclass

Progress information for fetch operations.

Source code in src/lacuna/io/downloaders/base.py
@dataclass
class FetchProgress:
    """Progress information for fetch operations."""

    phase: Literal["download", "processing", "registration"]
    """Current operation phase."""

    current_file: str
    """Name of file currently being processed."""

    files_completed: int
    """Number of files completed."""

    files_total: int
    """Total number of files to process."""

    bytes_transferred: int = 0
    """Bytes transferred in current download."""

    bytes_total: int = 0
    """Total bytes for current download."""

    message: str = ""
    """Human-readable status message."""

    @property
    def percent_complete(self) -> float:
        """Overall percentage completion."""
        if self.files_total == 0:
            return 0.0
        return (self.files_completed / self.files_total) * 100

    @property
    def download_percent(self) -> float:
        """Current file download percentage."""
        if self.bytes_total == 0:
            return 0.0
        return (self.bytes_transferred / self.bytes_total) * 100

bytes_total = 0 class-attribute instance-attribute

Total bytes for current download.

bytes_transferred = 0 class-attribute instance-attribute

Bytes transferred in current download.

current_file instance-attribute

Name of file currently being processed.

download_percent property

Current file download percentage.

files_completed instance-attribute

Number of files completed.

files_total instance-attribute

Total number of files to process.

message = '' class-attribute instance-attribute

Human-readable status message.

percent_complete property

Overall percentage completion.

phase instance-attribute

Current operation phase.

FetchResult dataclass

Result of a connectome fetch operation.

Source code in src/lacuna/io/downloaders/base.py
@dataclass
class FetchResult:
    """Result of a connectome fetch operation."""

    success: bool
    """Whether the operation completed successfully."""

    connectome_name: str
    """Name of the fetched connectome."""

    output_dir: Path
    """Directory containing processed files."""

    output_files: list[Path] = field(default_factory=list)
    """List of created output files."""

    registered: bool = False
    """Whether the connectome was registered."""

    register_name: str | None = None
    """Name used for registration, or None if not registered."""

    duration_seconds: float = 0.0
    """Total operation time in seconds."""

    download_time_seconds: float = 0.0
    """Time spent downloading."""

    processing_time_seconds: float = 0.0
    """Time spent processing."""

    warnings: list[str] = field(default_factory=list)
    """Non-fatal warnings encountered."""

    error: str | None = None
    """Error message if success=False."""

    def summary(self) -> str:
        """Generate human-readable summary."""
        if self.success:
            return (
                f"✅ Successfully fetched {self.connectome_name}\n"
                f"   Output: {self.output_dir}\n"
                f"   Files: {len(self.output_files)}\n"
                f"   Registered as: {self.register_name or 'not registered'}\n"
                f"   Time: {self.download_time_seconds:.1f}s download, "
                f"{self.processing_time_seconds:.1f}s processing"
            )
        return f"❌ Failed to fetch {self.connectome_name}: {self.error}"

connectome_name instance-attribute

Name of the fetched connectome.

download_time_seconds = 0.0 class-attribute instance-attribute

Time spent downloading.

duration_seconds = 0.0 class-attribute instance-attribute

Total operation time in seconds.

error = None class-attribute instance-attribute

Error message if success=False.

output_dir instance-attribute

Directory containing processed files.

output_files = field(default_factory=list) class-attribute instance-attribute

List of created output files.

processing_time_seconds = 0.0 class-attribute instance-attribute

Time spent processing.

register_name = None class-attribute instance-attribute

Name used for registration, or None if not registered.

registered = False class-attribute instance-attribute

Whether the connectome was registered.

success instance-attribute

Whether the operation completed successfully.

warnings = field(default_factory=list) class-attribute instance-attribute

Non-fatal warnings encountered.

summary()

Generate human-readable summary.

Source code in src/lacuna/io/downloaders/base.py
def summary(self) -> str:
    """Generate human-readable summary."""
    if self.success:
        return (
            f"✅ Successfully fetched {self.connectome_name}\n"
            f"   Output: {self.output_dir}\n"
            f"   Files: {len(self.output_files)}\n"
            f"   Registered as: {self.register_name or 'not registered'}\n"
            f"   Time: {self.download_time_seconds:.1f}s download, "
            f"{self.processing_time_seconds:.1f}s processing"
        )
    return f"❌ Failed to fetch {self.connectome_name}: {self.error}"

get_api_key(cli_key=None)

Get API key using priority order: CLI > env var > config file.

Parameters:

Name Type Description Default
cli_key str

API key provided via CLI argument.

None

Returns:

Type Description
str or None

The API key, or None if not found.

Source code in src/lacuna/io/downloaders/base.py
def get_api_key(cli_key: str | None = None) -> str | None:
    """
    Get API key using priority order: CLI > env var > config file.

    Parameters
    ----------
    cli_key : str, optional
        API key provided via CLI argument.

    Returns
    -------
    str or None
        The API key, or None if not found.
    """
    if cli_key:
        return cli_key
    if key := os.environ.get("DATAVERSE_API_KEY"):
        return key
    return _load_config_file_key()