Spaces:

itslikethisnow
/

rubric-ai

Sleeping

rubric-ai / backend /core /utils.py

initial commit

dccc925 26 days ago

1.11 kB

	"""Utility functions for backend processing."""

	import html
	import re


	def clean_model_output(content: str) -> str:
	"""
	Clean LLM output by removing reasoning artifacts.

	Removes:
	1. <think>...</think> tags from DeepSeek models
	2. HTML entities (<think>)
	3. "Reasoning:" headers
	4. Malformed/cut-off tags

	Args:
	content: Raw LLM response string

	Returns:
	Cleaned response string
	"""
	if not content:
	return ""

	# 1. Unescape HTML entities
	content = html.unescape(content)

	# 2. Remove <think> blocks (case-insensitive, DOTALL for multiline)
	content = re.sub(
	r'<think>.*?</think>',
	'',
	content,
	flags=re.DOTALL \| re.IGNORECASE
	)

	# 3. Handle cut-off/malformed tags (content after </think>)
	if '</think>' in content:
	content = content.split('</think>')[-1]

	# 4. Remove "Reasoning:" headers
	if content.strip().startswith("Reasoning:"):
	parts = content.split("\n\n", 1)
	if len(parts) > 1:
	content = parts[1]

	return content.strip()