Source code for kerb.testing.datasets

"""Dataset management for testing."""

import json
import random
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple


[docs] class TestDataset: """Dataset for evaluation testing."""
[docs] def __init__(self, name: str, examples: Optional[List[Dict[str, Any]]] = None): """Initialize dataset. Args: name: Dataset name examples: List of examples """ self.name = name self.examples = examples or []
[docs] def add_example( self, input: str, output: str, metadata: Optional[Dict] = None ) -> None: """Add an example to the dataset.""" self.examples.append( {"input": input, "output": output, "metadata": metadata or {}} )
[docs] def __len__(self) -> int: """Get dataset size.""" return len(self.examples)
[docs] def __getitem__(self, idx: int) -> Dict[str, Any]: """Get example by index.""" return self.examples[idx]
[docs] def __iter__(self): """Iterate over examples.""" return iter(self.examples)
[docs] def save(self, filepath: Path) -> None: """Save dataset to disk.""" with open(filepath, "w") as f: json.dump({"name": self.name, "examples": self.examples}, f, indent=2)
[docs] @classmethod def load(cls, filepath: Path) -> "TestDataset": """Load dataset from disk.""" with open(filepath) as f: data = json.load(f) return cls(name=data["name"], examples=data["examples"])
[docs] def create_dataset( name: str, examples: List[Tuple[str, str]], metadata: Optional[List[Dict]] = None ) -> TestDataset: """Create a test dataset. Args: name: Dataset name examples: List of (input, output) tuples metadata: Optional metadata for each example Returns: TestDataset instance """ dataset = TestDataset(name) for i, (input_text, output_text) in enumerate(examples): meta = metadata[i] if metadata and i < len(metadata) else {} dataset.add_example(input_text, output_text, meta) return dataset
[docs] def load_dataset(filepath: Path) -> TestDataset: """Load dataset from file. Args: filepath: Path to dataset file Returns: TestDataset instance """ return TestDataset.load(filepath)
def split_dataset( dataset: TestDataset, train_ratio: float = 0.8, val_ratio: float = 0.1, test_ratio: float = 0.1, seed: Optional[int] = None, ) -> Tuple[TestDataset, TestDataset, TestDataset]: """Split dataset into train/val/test sets. Args: dataset: Dataset to split train_ratio: Training set ratio val_ratio: Validation set ratio test_ratio: Test set ratio seed: Random seed Returns: Tuple of (train_dataset, val_dataset, test_dataset) """ assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-6 examples = dataset.examples.copy() if seed is not None: random.seed(seed) random.shuffle(examples) n = len(examples) train_end = int(n * train_ratio) val_end = train_end + int(n * val_ratio) train_dataset = TestDataset(f"{dataset.name}_train", examples[:train_end]) val_dataset = TestDataset(f"{dataset.name}_val", examples[train_end:val_end]) test_dataset = TestDataset(f"{dataset.name}_test", examples[val_end:]) return train_dataset, val_dataset, test_dataset def augment_dataset( dataset: TestDataset, augmentation_fn: Callable[[str], List[str]], augmentation_factor: int = 2, ) -> TestDataset: """Augment dataset with variations. Args: dataset: Original dataset augmentation_fn: Function to generate augmented inputs augmentation_factor: Number of augmentations per example Returns: Augmented dataset """ augmented = TestDataset(f"{dataset.name}_augmented") for example in dataset: # Add original augmented.add_example( example["input"], example["output"], example.get("metadata") ) # Add augmentations augmented_inputs = augmentation_fn(example["input"]) for aug_input in augmented_inputs[:augmentation_factor]: augmented.add_example( aug_input, example["output"], {**example.get("metadata", {}), "augmented": True}, ) return augmented