"""Utility functions for backend processing.""" import html import re def clean_model_output(content: str) -> str: """ Clean LLM output by removing reasoning artifacts. Removes: 1. ... tags from DeepSeek models 2. HTML entities (<think>) 3. "Reasoning:" headers 4. Malformed/cut-off tags Args: content: Raw LLM response string Returns: Cleaned response string """ if not content: return "" # 1. Unescape HTML entities content = html.unescape(content) # 2. Remove blocks (case-insensitive, DOTALL for multiline) content = re.sub( r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE ) # 3. Handle cut-off/malformed tags (content after ) if '' in content: content = content.split('')[-1] # 4. Remove "Reasoning:" headers if content.strip().startswith("Reasoning:"): parts = content.split("\n\n", 1) if len(parts) > 1: content = parts[1] return content.strip()