Source code for kerb.embedding.utils.operations

"""Vector operations for embeddings."""

import math
from typing import List



[docs]
def normalize_vector(vector: List[float]) -> List[float]:
    """Normalize a vector to unit length (L2 norm = 1).

    Args:
        vector (List[float]): Input vector

    Returns:
        List[float]: Normalized vector
    """
    magnitude = math.sqrt(sum(x * x for x in vector))
    if magnitude == 0:
        return vector
    return [x / magnitude for x in vector]




[docs]
def vector_magnitude(vector: List[float]) -> float:
    """Calculate the magnitude (L2 norm) of a vector.

    Args:
        vector (List[float]): Input vector

    Returns:
        float: Vector magnitude
    """
    return math.sqrt(sum(x * x for x in vector))




[docs]
def mean_pooling(vectors: List[List[float]]) -> List[float]:
    """Calculate the mean of multiple vectors (centroid).

    Useful for averaging embeddings of multiple texts.

    Args:
        vectors (List[List[float]]): List of vectors to average

    Returns:
        List[float]: Mean vector

    Examples:
        from kerb.embedding import embed_batch
        # Average embeddings of multiple sentences
        sentences = ["First sentence.", "Second sentence.", "Third sentence."]
        embeddings = embed_batch(sentences)
        avg_embedding = mean_pooling(embeddings)
    """
    if not vectors:
        return []

    dim = len(vectors[0])
    result = [0.0] * dim

    for vec in vectors:
        if len(vec) != dim:
            raise ValueError("All vectors must have same dimensions")
        for i, val in enumerate(vec):
            result[i] += val

    n = len(vectors)
    return [x / n for x in result]




[docs]
def weighted_mean_pooling(
    vectors: List[List[float]], weights: List[float]
) -> List[float]:
    """Calculate weighted mean of multiple vectors.

    Args:
        vectors (List[List[float]]): List of vectors
        weights (List[float]): Weight for each vector (will be normalized)

    Returns:
        List[float]: Weighted mean vector

    Examples:
        from kerb.embedding import embed_batch
        embeddings = embed_batch(["important", "less important"])
        weighted_avg = weighted_mean_pooling(embeddings, weights=[0.8, 0.2])
    """
    if not vectors or not weights:
        return []

    if len(vectors) != len(weights):
        raise ValueError("Number of vectors and weights must match")

    # Normalize weights
    total_weight = sum(weights)
    if total_weight == 0:
        raise ValueError("Total weight cannot be zero")
    norm_weights = [w / total_weight for w in weights]

    dim = len(vectors[0])
    result = [0.0] * dim

    for vec, weight in zip(vectors, norm_weights):
        if len(vec) != dim:
            raise ValueError("All vectors must have same dimensions")
        for i, val in enumerate(vec):
            result[i] += val * weight

    return result




[docs]
def max_pooling(vectors: List[List[float]]) -> List[float]:
    """Apply max pooling across multiple vectors (element-wise maximum).

    Args:
        vectors (List[List[float]]): List of vectors

    Returns:
        List[float]: Max-pooled vector
    """
    if not vectors:
        return []

    dim = len(vectors[0])
    result = list(vectors[0])

    for vec in vectors[1:]:
        if len(vec) != dim:
            raise ValueError("All vectors must have same dimensions")
        for i, val in enumerate(vec):
            result[i] = max(result[i], val)

    return result