Spaces:
Sleeping
Sleeping
| """Utility functions for backend processing.""" | |
| import html | |
| import re | |
| def clean_model_output(content: str) -> str: | |
| """ | |
| Clean LLM output by removing reasoning artifacts. | |
| Removes: | |
| 1. <think>...</think> tags from DeepSeek models | |
| 2. HTML entities (<think>) | |
| 3. "Reasoning:" headers | |
| 4. Malformed/cut-off tags | |
| Args: | |
| content: Raw LLM response string | |
| Returns: | |
| Cleaned response string | |
| """ | |
| if not content: | |
| return "" | |
| # 1. Unescape HTML entities | |
| content = html.unescape(content) | |
| # 2. Remove <think> blocks (case-insensitive, DOTALL for multiline) | |
| content = re.sub( | |
| r'<think>.*?</think>', | |
| '', | |
| content, | |
| flags=re.DOTALL | re.IGNORECASE | |
| ) | |
| # 3. Handle cut-off/malformed tags (content after </think>) | |
| if '</think>' in content: | |
| content = content.split('</think>')[-1] | |
| # 4. Remove "Reasoning:" headers | |
| if content.strip().startswith("Reasoning:"): | |
| parts = content.split("\n\n", 1) | |
| if len(parts) > 1: | |
| content = parts[1] | |
| return content.strip() | |