Skip to content

glazing.utils.cache

Caching utilities.

cache

Caching utilities for the glazing package.

This module provides caching mechanisms for query results and cross-reference resolution to improve performance when working with large linguistic datasets.

CLASS DESCRIPTION
LRUCache

Thread-safe Least Recently Used cache implementation.

TTLCache

Time-To-Live cache with automatic expiration.

QueryCache

Specialized cache for dataset queries.

PersistentCache

Optional file-based persistent cache.

FUNCTION DESCRIPTION
generate_cache_key

Generate a unique cache key from function arguments.

cached_method

Decorator for caching method results.

clear_all_caches

Clear all active caches in the application.

Notes

The caching system is designed to be thread-safe and can handle concurrent access from multiple threads. All caches can be disabled globally for testing or debugging purposes.

Classes

CacheBase()

Base class for all cache implementations.

Provides common interface and registration for cache management.

Initialize and register the cache.

METHOD DESCRIPTION
clear

Clear all entries from the cache.

is_enabled

Check if caching is enabled.

size

Get the number of entries in the cache.

Source code in src/glazing/utils/cache.py
def __init__(self) -> None:
    """Initialize and register the cache."""
    _cache_registry.add(self)
Functions
clear() -> None

Clear all entries from the cache.

Source code in src/glazing/utils/cache.py
def clear(self) -> None:
    """Clear all entries from the cache."""
is_enabled() -> bool

Check if caching is enabled.

Source code in src/glazing/utils/cache.py
def is_enabled(self) -> bool:
    """Check if caching is enabled."""
    return CACHING_ENABLED
size() -> int

Get the number of entries in the cache.

Source code in src/glazing/utils/cache.py
def size(self) -> int:
    """Get the number of entries in the cache."""
    return 0  # Default implementation returns 0

LRUCache(max_size: int = 128)

Bases: CacheBase

Thread-safe Least Recently Used cache.

PARAMETER DESCRIPTION
max_size

Maximum number of entries to store.

TYPE: int DEFAULT: 128

ATTRIBUTE DESCRIPTION
hits

Number of cache hits.

TYPE: int

misses

Number of cache misses.

TYPE: int

METHOD DESCRIPTION
get

Get a value from the cache.

put

Store a value in the cache.

clear

Clear all entries.

get_stats

Get cache statistics.

Examples:

>>> cache = LRUCache[str](max_size=100)
>>> cache.put("key1", "value1")
>>> value = cache.get("key1")
>>> print(cache.get_stats())

Initialize the LRU cache.

Source code in src/glazing/utils/cache.py
def __init__(self, max_size: int = 128) -> None:
    """Initialize the LRU cache."""
    super().__init__()
    self.max_size = max_size
    self._cache: OrderedDict[Hashable, T] = OrderedDict()
    self._lock = threading.RLock()
    self.hits = 0
    self.misses = 0
Functions
clear() -> None

Clear all entries from the cache.

Source code in src/glazing/utils/cache.py
def clear(self) -> None:
    """Clear all entries from the cache."""
    with self._lock:
        self._cache.clear()
        self.hits = 0
        self.misses = 0
get(key: Hashable, default: T | None = None) -> T | None

Get a value from the cache.

PARAMETER DESCRIPTION
key

The cache key.

TYPE: Hashable

default

Default value if key not found.

TYPE: T | None DEFAULT: None

RETURNS DESCRIPTION
T | None

The cached value or default.

Source code in src/glazing/utils/cache.py
def get(self, key: Hashable, default: T | None = None) -> T | None:
    """Get a value from the cache.

    Parameters
    ----------
    key : Hashable
        The cache key.
    default : T | None
        Default value if key not found.

    Returns
    -------
    T | None
        The cached value or default.
    """
    if not self.is_enabled():
        return default

    with self._lock:
        if key in self._cache:
            # Move to end (most recently used)
            self._cache.move_to_end(key)
            self.hits += 1
            return self._cache[key]
        self.misses += 1
        return default
get_stats() -> dict[str, int | float]

Get cache statistics.

RETURNS DESCRIPTION
dict[str, int | float]

Cache statistics including hits, misses, and hit rate.

Source code in src/glazing/utils/cache.py
def get_stats(self) -> dict[str, int | float]:
    """Get cache statistics.

    Returns
    -------
    dict[str, int | float]
        Cache statistics including hits, misses, and hit rate.
    """
    total = self.hits + self.misses
    hit_rate = self.hits / total if total > 0 else 0.0

    return {
        "hits": self.hits,
        "misses": self.misses,
        "total_requests": total,
        "hit_rate": hit_rate,
        "size": self.size(),
        "max_size": self.max_size,
    }
put(key: Hashable, value: T) -> None

Store a value in the cache.

PARAMETER DESCRIPTION
key

The cache key.

TYPE: Hashable

value

The value to cache.

TYPE: T

Source code in src/glazing/utils/cache.py
def put(self, key: Hashable, value: T) -> None:
    """Store a value in the cache.

    Parameters
    ----------
    key : Hashable
        The cache key.
    value : T
        The value to cache.
    """
    if not self.is_enabled():
        return

    with self._lock:
        if key in self._cache:
            # Update existing entry
            self._cache.move_to_end(key)
            self._cache[key] = value
        else:
            # Add new entry
            self._cache[key] = value
            # Remove oldest if over capacity
            if len(self._cache) > self.max_size:
                self._cache.popitem(last=False)
size() -> int

Get the number of entries in the cache.

Source code in src/glazing/utils/cache.py
def size(self) -> int:
    """Get the number of entries in the cache."""
    with self._lock:
        return len(self._cache)

PersistentCache(cache_dir: Path | str, serializer: str = 'json')

Bases: CacheBase

File-based persistent cache.

PARAMETER DESCRIPTION
cache_dir

Directory to store cache files.

TYPE: Path | str

serializer

Serialization method ("json" or "pickle").

TYPE: str DEFAULT: 'json'

METHOD DESCRIPTION
get

Get a value from persistent storage.

put

Store a value persistently.

Initialize the persistent cache.

Source code in src/glazing/utils/cache.py
def __init__(self, cache_dir: Path | str, serializer: str = "json") -> None:
    """Initialize the persistent cache."""
    super().__init__()
    self.cache_dir = Path(cache_dir)
    self.cache_dir.mkdir(parents=True, exist_ok=True)
    self.serializer = serializer
    self._lock = threading.RLock()
Functions
clear() -> None

Clear all cache files.

Source code in src/glazing/utils/cache.py
def clear(self) -> None:
    """Clear all cache files."""
    with self._lock:
        for cache_file in self.cache_dir.glob("*"):
            if cache_file.is_file():
                cache_file.unlink()
get(key: str, default: T | None = None) -> T | None

Get a value from persistent storage.

PARAMETER DESCRIPTION
key

The cache key.

TYPE: str

default

Default value if not found.

TYPE: T | None DEFAULT: None

RETURNS DESCRIPTION
T | None

The cached value or default.

Source code in src/glazing/utils/cache.py
def get(self, key: str, default: T | None = None) -> T | None:
    """Get a value from persistent storage.

    Parameters
    ----------
    key : str
        The cache key.
    default : T | None
        Default value if not found.

    Returns
    -------
    T | None
        The cached value or default.
    """
    if not self.is_enabled():
        return default

    with self._lock:
        cache_path = self._get_cache_path(key)

        if not cache_path.exists():
            return default

        try:
            if self.serializer == "json":
                with cache_path.open("r") as f:
                    return json.load(f)  # type: ignore[no-any-return]
            else:
                with cache_path.open("rb") as f:
                    return pickle.load(f)  # type: ignore[no-any-return]  # noqa: S301
        except (json.JSONDecodeError, OSError, ValueError):
            # Corrupted cache file
            cache_path.unlink(missing_ok=True)
            return default
put(key: str, value: T) -> None

Store a value persistently.

PARAMETER DESCRIPTION
key

The cache key.

TYPE: str

value

The value to cache.

TYPE: T

Source code in src/glazing/utils/cache.py
def put(self, key: str, value: T) -> None:
    """Store a value persistently.

    Parameters
    ----------
    key : str
        The cache key.
    value : T
        The value to cache.
    """
    if not self.is_enabled():
        return

    with self._lock:
        cache_path = self._get_cache_path(key)

        try:
            if self.serializer == "json":
                with cache_path.open("w") as f:
                    json.dump(value, f)
            else:
                with cache_path.open("wb") as f:
                    pickle.dump(value, f)
        except (OSError, TypeError):
            # Failed to write cache
            cache_path.unlink(missing_ok=True)
size() -> int

Get the number of cache files.

Source code in src/glazing/utils/cache.py
def size(self) -> int:
    """Get the number of cache files."""
    with self._lock:
        return len(list(self.cache_dir.glob("*")))

QueryCache(max_size: int = 256, ttl: float = 600.0)

Bases: CacheBase

Specialized cache for dataset queries.

Combines LRU and TTL strategies for optimal query caching.

PARAMETER DESCRIPTION
max_size

Maximum number of cached queries.

TYPE: int DEFAULT: 256

ttl

Time-to-live for query results.

TYPE: float DEFAULT: 600.0

METHOD DESCRIPTION
get_query_result

Get cached query result.

cache_query_result

Cache a query result.

invalidate_query_type

Invalidate all queries of a specific type.

Initialize the query cache.

Source code in src/glazing/utils/cache.py
def __init__(self, max_size: int = 256, ttl: float = 600.0) -> None:
    """Initialize the query cache."""
    super().__init__()
    self._lru_cache = LRUCache[CacheValue](max_size=max_size)
    self._ttl_cache = TTLCache[CacheValue](max_size=max_size, ttl=ttl)
Functions
cache_query_result(query_type: str, params: QueryParams, result: CacheValue, ttl: float | None = None) -> None

Cache a query result.

PARAMETER DESCRIPTION
query_type

Type of query.

TYPE: str

params

Query parameters.

TYPE: QueryParams

result

Query result to cache.

TYPE: CacheValue

ttl

Custom TTL for this result.

TYPE: float | None DEFAULT: None

Source code in src/glazing/utils/cache.py
def cache_query_result(
    self, query_type: str, params: QueryParams, result: CacheValue, ttl: float | None = None
) -> None:
    """Cache a query result.

    Parameters
    ----------
    query_type : str
        Type of query.
    params : QueryParams
        Query parameters.
    result : CacheValue
        Query result to cache.
    ttl : float | None
        Custom TTL for this result.
    """
    key = self._generate_query_key(query_type, params)

    # Store in both caches
    self._lru_cache.put(key, result)
    self._ttl_cache.put(key, result, ttl)
clear() -> None

Clear all cached queries.

Source code in src/glazing/utils/cache.py
def clear(self) -> None:
    """Clear all cached queries."""
    self._lru_cache.clear()
    self._ttl_cache.clear()
get_query_result(query_type: str, params: QueryParams) -> CacheValue | None

Get cached query result.

PARAMETER DESCRIPTION
query_type

Type of query (e.g., "frame_by_lemma").

TYPE: str

params

Query parameters.

TYPE: QueryParams

RETURNS DESCRIPTION
CacheValue | None

Cached result or None.

Source code in src/glazing/utils/cache.py
def get_query_result(self, query_type: str, params: QueryParams) -> CacheValue | None:
    """Get cached query result.

    Parameters
    ----------
    query_type : str
        Type of query (e.g., "frame_by_lemma").
    params : QueryParams
        Query parameters.

    Returns
    -------
    CacheValue | None
        Cached result or None.
    """
    key = self._generate_query_key(query_type, params)

    # Check TTL cache first
    result = self._ttl_cache.get(key)
    if result is not None:
        return result

    # Fall back to LRU cache
    return self._lru_cache.get(key)
get_stats() -> dict[str, int | float | dict[str, int | float]]

Get combined cache statistics.

Source code in src/glazing/utils/cache.py
def get_stats(self) -> dict[str, int | float | dict[str, int | float]]:
    """Get combined cache statistics."""
    return {
        "lru_stats": self._lru_cache.get_stats(),
        "ttl_size": self._ttl_cache.size(),
        "total_size": self.size(),
    }
invalidate_query_type(query_type: str) -> None

Invalidate all queries of a specific type.

PARAMETER DESCRIPTION
query_type

Type of query to invalidate.

TYPE: str

Source code in src/glazing/utils/cache.py
def invalidate_query_type(self, query_type: str) -> None:
    """Invalidate all queries of a specific type.

    Parameters
    ----------
    query_type : str
        Type of query to invalidate.
    """
    msg = (
        f"Query type '{query_type}' invalidation not yet implemented. "
        "This method requires implementing key categorization by query type to "
        "selectively invalidate cached queries."
    )
    raise NotImplementedError(msg)
size() -> int

Get total number of cached entries.

Source code in src/glazing/utils/cache.py
def size(self) -> int:
    """Get total number of cached entries."""
    return self._lru_cache.size() + self._ttl_cache.size()

TTLCache(max_size: int = 128, ttl: float = 300.0)

Bases: CacheBase

Time-To-Live cache with automatic expiration.

PARAMETER DESCRIPTION
max_size

Maximum number of entries.

TYPE: int DEFAULT: 128

ttl

Time-to-live in seconds for each entry.

TYPE: float DEFAULT: 300.0

METHOD DESCRIPTION
get

Get a value if not expired.

put

Store a value with optional custom TTL.

cleanup

Remove expired entries.

Examples:

>>> cache = TTLCache[str](max_size=100, ttl=60.0)
>>> cache.put("key1", "value1")
>>> time.sleep(61)
>>> cache.get("key1")  # Returns None (expired)

Initialize the TTL cache.

Source code in src/glazing/utils/cache.py
def __init__(self, max_size: int = 128, ttl: float = 300.0) -> None:
    """Initialize the TTL cache."""
    super().__init__()
    self.max_size = max_size
    self.default_ttl = ttl
    self._cache: dict[Hashable, tuple[T, float]] = {}
    self._lock = threading.RLock()
Functions
cleanup() -> int

Remove expired entries.

RETURNS DESCRIPTION
int

Number of entries removed.

Source code in src/glazing/utils/cache.py
def cleanup(self) -> int:
    """Remove expired entries.

    Returns
    -------
    int
        Number of entries removed.
    """
    with self._lock:
        current_time = time.time()
        expired_keys = [
            key for key, (_, expiry) in self._cache.items() if current_time >= expiry
        ]
        for key in expired_keys:
            del self._cache[key]
        return len(expired_keys)
clear() -> None

Clear all entries.

Source code in src/glazing/utils/cache.py
def clear(self) -> None:
    """Clear all entries."""
    with self._lock:
        self._cache.clear()
get(key: Hashable, default: T | None = None) -> T | None

Get a value if not expired.

PARAMETER DESCRIPTION
key

The cache key.

TYPE: Hashable

default

Default value if key not found or expired.

TYPE: T | None DEFAULT: None

RETURNS DESCRIPTION
T | None

The cached value or default.

Source code in src/glazing/utils/cache.py
def get(self, key: Hashable, default: T | None = None) -> T | None:
    """Get a value if not expired.

    Parameters
    ----------
    key : Hashable
        The cache key.
    default : T | None
        Default value if key not found or expired.

    Returns
    -------
    T | None
        The cached value or default.
    """
    if not self.is_enabled():
        return default

    with self._lock:
        if key in self._cache:
            value, expiry = self._cache[key]
            if time.time() < expiry:
                return value
            # Remove expired entry
            del self._cache[key]
        return default
put(key: Hashable, value: T, ttl: float | None = None) -> None

Store a value with TTL.

PARAMETER DESCRIPTION
key

The cache key.

TYPE: Hashable

value

The value to cache.

TYPE: T

ttl

Custom TTL in seconds, or use default.

TYPE: float | None DEFAULT: None

Source code in src/glazing/utils/cache.py
def put(self, key: Hashable, value: T, ttl: float | None = None) -> None:
    """Store a value with TTL.

    Parameters
    ----------
    key : Hashable
        The cache key.
    value : T
        The value to cache.
    ttl : float | None
        Custom TTL in seconds, or use default.
    """
    if not self.is_enabled():
        return

    ttl = ttl if ttl is not None else self.default_ttl
    expiry = time.time() + ttl

    with self._lock:
        self._cache[key] = (value, expiry)

        # Remove oldest if over capacity
        if len(self._cache) > self.max_size:
            self._evict_oldest()
size() -> int

Get the number of entries.

Source code in src/glazing/utils/cache.py
def size(self) -> int:
    """Get the number of entries."""
    with self._lock:
        # Clean up expired entries first
        self.cleanup()
        return len(self._cache)

Functions

cached_method(cache: CacheBase | None = None, ttl: float | None = None) -> Callable[[Callable[..., T]], Callable[..., T]]

Decorator for caching method results.

PARAMETER DESCRIPTION
cache

Cache instance to use, or create LRU cache.

TYPE: CacheBase | None DEFAULT: None

ttl

Time-to-live for cached results.

TYPE: float | None DEFAULT: None

RETURNS DESCRIPTION
Callable[[Callable[..., T]], Callable[..., T]]

Decorator that adds caching to a function.

Examples:

>>> @cached_method(ttl=60.0)
... def expensive_function(x: int) -> int:
...     return x ** 2
Source code in src/glazing/utils/cache.py
def cached_method(
    cache: CacheBase | None = None, ttl: float | None = None
) -> Callable[[Callable[..., T]], Callable[..., T]]:
    """Decorator for caching method results.

    Parameters
    ----------
    cache : CacheBase | None
        Cache instance to use, or create LRU cache.
    ttl : float | None
        Time-to-live for cached results.

    Returns
    -------
    Callable[[Callable[..., T]], Callable[..., T]]
        Decorator that adds caching to a function.

    Examples
    --------
    >>> @cached_method(ttl=60.0)
    ... def expensive_function(x: int) -> int:
    ...     return x ** 2
    """

    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        # Create default cache if not provided
        nonlocal cache
        if cache is None:
            cache = TTLCache(ttl=ttl) if ttl is not None else LRUCache()

        @wraps(func)
        def wrapper(*args, **kwargs) -> T:  # type: ignore[no-untyped-def]
            # Generate cache key
            key = generate_cache_key(*args, **kwargs)

            # Check cache
            if isinstance(cache, TTLCache | LRUCache):
                result = cache.get(key)
            else:
                result = cache.get(key) if hasattr(cache, "get") else None

            if result is not None:
                return result  # type: ignore[no-any-return]

            # Compute result
            result = func(*args, **kwargs)

            # Store in cache
            if isinstance(cache, TTLCache):
                cache.put(key, result, ttl)
            elif isinstance(cache, LRUCache) or hasattr(cache, "put"):
                cache.put(key, result)

            return result

        # Attach cache for inspection
        wrapper.cache = cache  # type: ignore[attr-defined]
        return wrapper

    return decorator

clear_all_caches() -> int

Clear all active caches in the application.

RETURNS DESCRIPTION
int

Number of caches cleared.

Source code in src/glazing/utils/cache.py
def clear_all_caches() -> int:
    """Clear all active caches in the application.

    Returns
    -------
    int
        Number of caches cleared.
    """
    count = 0
    for cache in _cache_registry:
        try:
            cache.clear()
            count += 1
        except (OSError, ValueError):
            pass
    return count

generate_cache_key(*args, **kwargs) -> str

Generate a unique cache key from function arguments.

PARAMETER DESCRIPTION
*args

Positional arguments.

DEFAULT: ()

**kwargs

Keyword arguments.

DEFAULT: {}

RETURNS DESCRIPTION
str

A unique cache key.

Source code in src/glazing/utils/cache.py
def generate_cache_key(*args, **kwargs) -> str:  # type: ignore[no-untyped-def]
    """Generate a unique cache key from function arguments.

    Parameters
    ----------
    *args
        Positional arguments.
    **kwargs
        Keyword arguments.

    Returns
    -------
    str
        A unique cache key.
    """
    key_data = {"args": args, "kwargs": kwargs}
    key_str = json.dumps(key_data, sort_keys=True, default=str)
    return hashlib.md5(key_str.encode()).hexdigest()  # noqa: S324

set_caching_enabled(enabled: bool) -> None

Enable or disable all caching globally.

PARAMETER DESCRIPTION
enabled

Whether caching should be enabled.

TYPE: bool

Source code in src/glazing/utils/cache.py
def set_caching_enabled(enabled: bool) -> None:
    """Enable or disable all caching globally.

    Parameters
    ----------
    enabled : bool
        Whether caching should be enabled.
    """
    global CACHING_ENABLED
    CACHING_ENABLED = enabled

    if not enabled:
        clear_all_caches()