Source code for kerb.retrieval.formatting

"""Formatting utilities for search results.

This module provides functions for formatting and exporting search results.
"""

import json
from typing import List

from .structures import SearchResult


[docs] def format_results( results: List[SearchResult], format_style: str = "simple", include_metadata: bool = False, ) -> str: """Format search results for display. Args: results: Search results to format format_style: "simple", "detailed", or "json" include_metadata: Whether to include document metadata Returns: str: Formatted results Example: >>> results = keyword_search("python", docs) >>> print(format_results(results, format_style="detailed")) """ if not results: return "No results found." if format_style == "simple": lines = [] for result in results: lines.append( f"{result.rank}. [{result.score:.3f}] {result.document.content[:100]}..." ) return "\n".join(lines) elif format_style == "detailed": lines = [] for result in results: lines.append( f"--- Rank {result.rank} (Score: {result.score:.4f}, Method: {result.method}) ---" ) lines.append(f"Doc ID: {result.document.id}") lines.append(f"Content: {result.document.content}") if include_metadata and result.document.metadata: lines.append(f"Metadata: {result.document.metadata}") lines.append("") return "\n".join(lines) elif format_style == "json": data = [] for result in results: item = { "rank": result.rank, "score": result.score, "method": result.method, "document": { "id": result.document.id, "content": result.document.content, }, } if include_metadata: item["document"]["metadata"] = result.document.metadata data.append(item) return json.dumps(data, indent=2) return str(results)
[docs] def results_to_context( results: List[SearchResult], separator: str = "\n\n---\n\n", include_source: bool = True, ) -> str: """Convert search results to a context string for LLM prompts. Args: results: Search results to convert separator: Separator between documents include_source: Whether to include document IDs Returns: str: Formatted context string Example: >>> results = hybrid_search(query, query_emb, docs, embeddings) >>> context = results_to_context(results) >>> prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:" """ if not results: return "" parts = [] for result in results: if include_source: parts.append(f"[Source: {result.document.id}]\n{result.document.content}") else: parts.append(result.document.content) return separator.join(parts)