"""Utility functions for backend processing."""
import html
import re
def clean_model_output(content: str) -> str:
"""
Clean LLM output by removing reasoning artifacts.
Removes:
1. ... tags from DeepSeek models
2. HTML entities (<think>)
3. "Reasoning:" headers
4. Malformed/cut-off tags
Args:
content: Raw LLM response string
Returns:
Cleaned response string
"""
if not content:
return ""
# 1. Unescape HTML entities
content = html.unescape(content)
# 2. Remove blocks (case-insensitive, DOTALL for multiline)
content = re.sub(
r'.*?',
'',
content,
flags=re.DOTALL | re.IGNORECASE
)
# 3. Handle cut-off/malformed tags (content after )
if '' in content:
content = content.split('')[-1]
# 4. Remove "Reasoning:" headers
if content.strip().startswith("Reasoning:"):
parts = content.split("\n\n", 1)
if len(parts) > 1:
content = parts[1]
return content.strip()