Source code for kerb.embedding.utils.operations

"""Vector operations for embeddings."""

import math
from typing import List


[docs] def normalize_vector(vector: List[float]) -> List[float]: """Normalize a vector to unit length (L2 norm = 1). Args: vector (List[float]): Input vector Returns: List[float]: Normalized vector """ magnitude = math.sqrt(sum(x * x for x in vector)) if magnitude == 0: return vector return [x / magnitude for x in vector]
[docs] def vector_magnitude(vector: List[float]) -> float: """Calculate the magnitude (L2 norm) of a vector. Args: vector (List[float]): Input vector Returns: float: Vector magnitude """ return math.sqrt(sum(x * x for x in vector))
[docs] def mean_pooling(vectors: List[List[float]]) -> List[float]: """Calculate the mean of multiple vectors (centroid). Useful for averaging embeddings of multiple texts. Args: vectors (List[List[float]]): List of vectors to average Returns: List[float]: Mean vector Examples: from kerb.embedding import embed_batch # Average embeddings of multiple sentences sentences = ["First sentence.", "Second sentence.", "Third sentence."] embeddings = embed_batch(sentences) avg_embedding = mean_pooling(embeddings) """ if not vectors: return [] dim = len(vectors[0]) result = [0.0] * dim for vec in vectors: if len(vec) != dim: raise ValueError("All vectors must have same dimensions") for i, val in enumerate(vec): result[i] += val n = len(vectors) return [x / n for x in result]
[docs] def weighted_mean_pooling( vectors: List[List[float]], weights: List[float] ) -> List[float]: """Calculate weighted mean of multiple vectors. Args: vectors (List[List[float]]): List of vectors weights (List[float]): Weight for each vector (will be normalized) Returns: List[float]: Weighted mean vector Examples: from kerb.embedding import embed_batch embeddings = embed_batch(["important", "less important"]) weighted_avg = weighted_mean_pooling(embeddings, weights=[0.8, 0.2]) """ if not vectors or not weights: return [] if len(vectors) != len(weights): raise ValueError("Number of vectors and weights must match") # Normalize weights total_weight = sum(weights) if total_weight == 0: raise ValueError("Total weight cannot be zero") norm_weights = [w / total_weight for w in weights] dim = len(vectors[0]) result = [0.0] * dim for vec, weight in zip(vectors, norm_weights): if len(vec) != dim: raise ValueError("All vectors must have same dimensions") for i, val in enumerate(vec): result[i] += val * weight return result
[docs] def max_pooling(vectors: List[List[float]]) -> List[float]: """Apply max pooling across multiple vectors (element-wise maximum). Args: vectors (List[List[float]]): List of vectors Returns: List[float]: Max-pooled vector """ if not vectors: return [] dim = len(vectors[0]) result = list(vectors[0]) for vec in vectors[1:]: if len(vec) != dim: raise ValueError("All vectors must have same dimensions") for i, val in enumerate(vec): result[i] = max(result[i], val) return result