Spaces:

gjoliveira
/

data-ai-llm-eval-app

Sleeping

Guilherme

Deploy to HF Space

4b112ae 4 months ago

1.4 kB

	# metrics/bleu.py
	"""
	BLEU metric wrappers using sacreBLEU and file_utils.
	"""
	from utils.file_utils import *
	from sacrebleu.metrics import BLEU

	# Instância global de BLEU com tokenização 'intl', lowercase e smoothing 'exp'
	_bleu_scorer = BLEU(tokenize='intl', lowercase=True, smooth_method='exp')

	def section_bleu(gen_txt: str, ref_txt: str) -> float:
	"""
	Calcula BLEU para um par de strings (seção), retornando score de 0 a 100.
	"""
	if not gen_txt.strip() and not ref_txt.strip():
	return 100.0
	if (not gen_txt.strip()) ^ (not ref_txt.strip()):
	return 0.0
	return _bleu_scorer.sentence_score(gen_txt, [ref_txt]).score

	def full_bleu(gen_raw: str, ref_raw: str) -> float:
	"""
	Calcula BLEU global para strings completas, retornando score de 0 a 100.
	"""
	gen = normalize_and_flatten(gen_raw)
	ref = normalize_and_flatten(ref_raw)
	if not gen and not ref:
	return 100.0
	if (not gen) ^ (not ref):
	return 0.0
	return _bleu_scorer.sentence_score(gen, [ref]).score


	def compute_bleu_single(reference: str, prediction: str) -> str:
	"""
	Compute and format BLEU score for a single pair.
	"""
	if not reference or not prediction:
	return "Please provide both texts."
	score = full_bleu(prediction, reference) / 100.0
	return f"BLEU Score: {score:.4f}"