Spaces:
Sleeping
Sleeping
File size: 1,113 Bytes
dccc925 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
"""Utility functions for backend processing."""
import html
import re
def clean_model_output(content: str) -> str:
"""
Clean LLM output by removing reasoning artifacts.
Removes:
1. <think>...</think> tags from DeepSeek models
2. HTML entities (<think>)
3. "Reasoning:" headers
4. Malformed/cut-off tags
Args:
content: Raw LLM response string
Returns:
Cleaned response string
"""
if not content:
return ""
# 1. Unescape HTML entities
content = html.unescape(content)
# 2. Remove <think> blocks (case-insensitive, DOTALL for multiline)
content = re.sub(
r'<think>.*?</think>',
'',
content,
flags=re.DOTALL | re.IGNORECASE
)
# 3. Handle cut-off/malformed tags (content after </think>)
if '</think>' in content:
content = content.split('</think>')[-1]
# 4. Remove "Reasoning:" headers
if content.strip().startswith("Reasoning:"):
parts = content.split("\n\n", 1)
if len(parts) > 1:
content = parts[1]
return content.strip()
|