"""Cache utility functions.
This module provides helper functions for working with caches:
- create_memory_cache: Convenience factory for MemoryCache
- create_disk_cache: Convenience factory for DiskCache
- create_tiered_cache: Convenience factory for TieredCache
- create_llm_cache: Convenience factory for LLMCache
- invalidate_expired_entries: Clean up expired cache entries
- export_cache_stats: Export cache statistics in various formats
- estimate_cache_size: Estimate cache size in different units
"""
import json
from pathlib import Path
from typing import TYPE_CHECKING, Dict, Optional, Union
from .backends import DiskCache, LLMCache, MemoryCache, TieredCache
from .types import BaseCache
if TYPE_CHECKING:
from kerb.core.enums import ExportFormat, SizeUnit
[docs]
def create_memory_cache(
max_size: int = 1000, default_ttl: Optional[float] = None
) -> MemoryCache:
"""Create a new memory cache.
Args:
max_size: Maximum number of entries
default_ttl: Default TTL in seconds
Returns:
MemoryCache: New memory cache instance
Example:
>>> cache = create_memory_cache(max_size=100, default_ttl=3600)
"""
return MemoryCache(max_size=max_size, default_ttl=default_ttl)
[docs]
def create_disk_cache(
cache_dir: str = ".cache",
max_size: Optional[int] = None,
default_ttl: Optional[float] = None,
serializer: str = "pickle",
) -> DiskCache:
"""Create a new disk cache.
Args:
cache_dir: Directory to store cache files
max_size: Maximum number of entries
default_ttl: Default TTL in seconds
serializer: Serialization format ('pickle' or 'json')
Returns:
DiskCache: New disk cache instance
Example:
>>> cache = create_disk_cache(cache_dir=".cache/llm", serializer="json")
"""
return DiskCache(
cache_dir=cache_dir,
max_size=max_size,
default_ttl=default_ttl,
serializer=serializer,
)
[docs]
def create_tiered_cache(
memory_max_size: int = 100,
disk_cache_dir: str = ".cache",
disk_max_size: Optional[int] = None,
default_ttl: Optional[float] = None,
) -> TieredCache:
"""Create a new tiered cache (memory + disk).
Args:
memory_max_size: Maximum entries in memory cache
disk_cache_dir: Directory for disk cache
disk_max_size: Maximum entries in disk cache
default_ttl: Default TTL in seconds
Returns:
TieredCache: New tiered cache instance
Example:
>>> cache = create_tiered_cache(
... memory_max_size=50,
... disk_cache_dir=".cache/llm"
... )
"""
return TieredCache(
memory_max_size=memory_max_size,
disk_cache_dir=disk_cache_dir,
disk_max_size=disk_max_size,
default_ttl=default_ttl,
)
[docs]
def create_llm_cache(
backend: Optional[BaseCache] = None,
cost_per_token: float = 0.00001,
avg_tokens_per_request: int = 1000,
avg_response_time: float = 2.0,
) -> LLMCache:
"""Create a new LLM-specific cache.
Args:
backend: Cache backend to use
cost_per_token: Cost per token for tracking
avg_tokens_per_request: Average tokens per request
avg_response_time: Average response time in seconds
Returns:
LLMCache: New LLM cache instance
Example:
>>> cache = create_llm_cache(
... backend=create_tiered_cache(),
... cost_per_token=0.00002
... )
"""
return LLMCache(
backend=backend,
cost_per_token=cost_per_token,
avg_tokens_per_request=avg_tokens_per_request,
avg_response_time=avg_response_time,
)
[docs]
def invalidate_expired_entries(cache: BaseCache) -> int:
"""Manually invalidate all expired entries in a cache.
Args:
cache: Cache to clean
Returns:
int: Number of entries invalidated
Example:
>>> count = invalidate_expired_entries(cache)
>>> print(f"Removed {count} expired entries")
"""
if isinstance(cache, DiskCache):
cache._clean_expired()
return 0 # DiskCache cleans internally
if not isinstance(cache, MemoryCache):
return 0
expired_keys = []
for key, entry in cache._cache.items():
if entry.is_expired():
expired_keys.append(key)
for key in expired_keys:
cache.delete(key)
return len(expired_keys)
[docs]
def export_cache_stats(
cache: Union[BaseCache, LLMCache], format: Union["ExportFormat", str] = "dict"
) -> Union[Dict, str]:
"""Export cache statistics in various formats.
Args:
cache: Cache to export stats from
format: Output format (ExportFormat enum or string: 'dict', 'json', 'csv', 'table')
Returns:
Union[Dict, str]: Statistics in requested format
Examples:
>>> from kerb.core.enums import ExportFormat
>>> stats = export_cache_stats(cache, format=ExportFormat.JSON)
>>> print(stats)
"""
from kerb.core.enums import ExportFormat, validate_enum_or_string
# Validate and normalize format
format_val = validate_enum_or_string(format, ExportFormat, "format")
if isinstance(format_val, ExportFormat):
format_str = format_val.value
else:
format_str = format_val
if isinstance(cache, LLMCache):
stats = cache.get_stats()
elif isinstance(cache, (MemoryCache, DiskCache)):
stats = cache.get_stats()
elif isinstance(cache, TieredCache):
stats_dict = cache.get_stats()
# Return both
if format_str == "json":
return json.dumps(
{
"memory": stats_dict["memory"].to_dict(),
"disk": stats_dict["disk"].to_dict(),
},
indent=2,
)
return {
"memory": stats_dict["memory"].to_dict(),
"disk": stats_dict["disk"].to_dict(),
}
else:
stats = cache.stats
stats_dict = stats.to_dict()
if format_str == "json":
return json.dumps(stats_dict, indent=2)
return stats_dict
[docs]
def estimate_cache_size(
cache: BaseCache, unit: Union["SizeUnit", str] = "entries"
) -> Union[int, str]:
"""Estimate cache size in various units.
Args:
cache: Cache to measure
unit: Unit to measure in (SizeUnit enum or string: 'entries', 'bytes', 'kb', 'mb', 'gb')
Returns:
Union[int, str]: Size in requested unit
Examples:
>>> from kerb.core.enums import SizeUnit
>>> size = estimate_cache_size(cache, unit=SizeUnit.MB)
>>> print(f"Cache size: {size}")
"""
from kerb.core.enums import SizeUnit, validate_enum_or_string
# Validate and normalize unit
unit_val = validate_enum_or_string(unit, SizeUnit, "unit")
if isinstance(unit_val, SizeUnit):
unit_str = unit_val.value
else:
unit_str = unit_val
if unit_str == "entries":
return cache.size()
if isinstance(cache, DiskCache):
# Calculate disk usage
total_bytes = 0
for key in cache.keys():
cache_path = cache._get_cache_path(key)
if cache_path.exists():
total_bytes += cache_path.stat().st_size
if unit_str == "bytes":
return total_bytes
elif unit_str == "kb":
return total_bytes / 1024.0
elif unit_str == "mb":
return total_bytes / (1024.0 * 1024.0)
elif unit_str == "gb":
return total_bytes / (1024.0 * 1024.0 * 1024.0)
elif unit_str == "human":
# Convert to human readable
for unit_name in ["B", "KB", "MB", "GB"]:
if total_bytes < 1024.0:
return f"{total_bytes:.2f} {unit_name}"
total_bytes /= 1024.0
return f"{total_bytes:.2f} TB"
return cache.size()