Skip to content

glazing.cli.download

Download command implementation.

download

Download commands for the glazing CLI.

This module provides CLI commands for downloading linguistic datasets from their official sources with progress tracking and error handling.

Commands

download Download specific or all datasets.

Examples:

Download VerbNet: $ glazing download --dataset verbnet

Download all datasets: $ glazing download --dataset all

Download to specific directory: $ glazing download --dataset propbank --output-dir /path/to/data

List available datasets: $ glazing download --list

FUNCTION DESCRIPTION
dataset_command

Download a specific dataset or all datasets.

dataset_info

Get detailed information about a dataset.

download

Download datasets from official sources.

list_datasets

List available datasets for download.

Classes

Functions

dataset_command(dataset: str, output_dir: str | Path, force: bool) -> None

Download a specific dataset or all datasets.

Downloads the specified dataset(s) to the output directory.

PARAMETER DESCRIPTION
dataset

Dataset name to download ('all' for all datasets).

TYPE: str

output_dir

Output directory for downloaded datasets.

TYPE: str | Path

force

Force re-download even if dataset already exists.

TYPE: bool

Examples:

Download VerbNet: glazing download dataset --dataset verbnet

Download all datasets: glazing download dataset --dataset all --output-dir /data

Download with force: glazing download dataset --dataset framenet --force

Source code in src/glazing/cli/download.py
@download.command(name="dataset")
@click.option(
    "--dataset",
    "-d",
    type=click.Choice(["all", "verbnet", "propbank", "wordnet", "framenet"]),
    required=True,
    help="Dataset to download (all for all datasets)",
)
@click.option(
    "--output-dir",
    "-o",
    type=click.Path(exists=False, file_okay=False, dir_okay=True),
    default=Path("data/raw"),
    help="Output directory for downloaded datasets",
)
@click.option(
    "--force",
    "-f",
    is_flag=True,
    help="Force re-download even if dataset already exists",
)
def dataset_command(dataset: str, output_dir: str | Path, force: bool) -> None:
    """Download a specific dataset or all datasets.

    Downloads the specified dataset(s) to the output directory.

    Parameters
    ----------
    dataset : str
        Dataset name to download ('all' for all datasets).
    output_dir : str | Path
        Output directory for downloaded datasets.
    force : bool
        Force re-download even if dataset already exists.

    Examples
    --------
    Download VerbNet:
        glazing download dataset --dataset verbnet

    Download all datasets:
        glazing download dataset --dataset all --output-dir /data

    Download with force:
        glazing download dataset --dataset framenet --force
    """
    # Convert output_dir to Path and resolve to absolute path
    output_path = Path(output_dir).resolve()

    # Create output directory if it doesn't exist
    try:
        output_path.mkdir(parents=True, exist_ok=True)
    except OSError as e:
        click.echo(f"✗ Failed to create output directory: {e}", err=True)
        click.get_current_context().exit(1)

    if dataset == "all":
        _download_all_datasets(output_path)
    else:
        _download_single_dataset(dataset, output_path, force)

dataset_info(dataset: str) -> None

Get detailed information about a dataset.

Shows version, download method, and other metadata for the specified dataset.

Examples: glazing download info verbnet glazing download info framenet

Source code in src/glazing/cli/download.py
@download.command(name="info")
@click.argument("dataset", type=click.Choice(["verbnet", "propbank", "wordnet", "framenet"]))
def dataset_info(dataset: str) -> None:
    """Get detailed information about a dataset.

    Shows version, download method, and other metadata for the specified dataset.

    Examples:
        glazing download info verbnet
        glazing download info framenet
    """
    # Normalize to lowercase for internal use
    dataset_lower = dataset.lower()

    # Get display name
    display_names = {
        "verbnet": "VerbNet",
        "propbank": "PropBank",
        "wordnet": "WordNet",
        "framenet": "FrameNet",
    }
    display_name = display_names[dataset_lower]

    try:
        # Pass lowercase to get_dataset_info
        info = get_dataset_info(dataset_lower)

        click.echo(f"Dataset: {display_name}")
        click.echo(f"Version: {info['version']}")
        click.echo(f"Downloader: {info['class']}")

        click.echo("Download: Automatic")

        # Add dataset-specific information
        if dataset_lower == "verbnet":
            click.echo("Source: GitHub (uvi-nlp/verbnet)")
            click.echo("Format: XML classes with thematic roles and frames")

        elif dataset_lower == "propbank":
            click.echo("Source: GitHub (propbank/propbank-frames)")
            click.echo("Format: XML framesets with semantic roles")

        elif dataset_lower == "wordnet":
            click.echo("Source: Princeton University")
            click.echo("Format: Text files with synsets and relations")

        elif dataset_lower == "framenet":
            click.echo("Source: UC Berkeley ICSI")
            click.echo("Format: XML frames with lexical units and annotations")

    except ValueError as e:
        click.echo(f"Error getting dataset info: {e}", err=True)
        click.get_current_context().exit(1)

download() -> None

Download datasets from official sources.

Downloads linguistic datasets including VerbNet, PropBank, WordNet, and FrameNet (manual download required) from their official sources.

Source code in src/glazing/cli/download.py
@click.group(name="download")
def download() -> None:
    """Download datasets from official sources.

    Downloads linguistic datasets including VerbNet, PropBank, WordNet,
    and FrameNet (manual download required) from their official sources.
    """

list_datasets() -> None

List available datasets for download.

Shows all supported datasets with their versions and download status.

Source code in src/glazing/cli/download.py
@download.command(name="list")
def list_datasets() -> None:
    """List available datasets for download.

    Shows all supported datasets with their versions and download status.
    """
    click.echo("Available datasets:")
    click.echo()

    datasets = get_available_datasets()

    display_names = {
        "verbnet": "VerbNet",
        "propbank": "PropBank",
        "wordnet": "WordNet",
        "framenet": "FrameNet",
    }

    for dataset in datasets:
        try:
            info = get_dataset_info(dataset)
            status = "Auto-download"
            display_name = display_names.get(dataset, dataset)

            click.echo(f"  {display_name}:")
            click.echo(f"    Version: {info['version']}")
            click.echo(f"    Status:  {status}")
            click.echo()

        except ValueError as e:
            display_name = display_names.get(dataset, dataset)
            click.echo(f"  {display_name}: Error getting info - {e}")
            click.echo()