Source code for kerb.parsing.utilities

"""Utility functions for parsing.

This module provides general utility functions for cleaning and
preprocessing LLM outputs.
"""

import re


[docs] def clean_llm_output(text: str) -> str: """Clean common artifacts from LLM outputs. Removes: - Markdown code blocks - Leading/trailing whitespace - Common prefixes like "Here is..." or "Sure, here's..." Args: text (str): Raw LLM output Returns: str: Cleaned text """ # Remove markdown code blocks first text = re.sub(r"```(?:\w+)?\n?(.*?)\n?```", r"\1", text, flags=re.DOTALL) # Strip leading/trailing whitespace text = text.strip() # Remove common prefixes (more comprehensive patterns) prefixes = [ r"^Sure,?\s+here(?:\'s| is)\s+(?:the|a|an)?\s*", r"^Here(?:\'s| is)\s+(?:the|a|an)?\s*", r"^(?:OK|Okay),?\s+", ] for prefix in prefixes: text = re.sub(prefix, "", text, flags=re.IGNORECASE | re.MULTILINE) return text.strip()