# metrics/bleu.py """ BLEU metric wrappers using sacreBLEU and file_utils. """ from utils.file_utils import * from sacrebleu.metrics import BLEU # Instância global de BLEU com tokenização 'intl', lowercase e smoothing 'exp' _bleu_scorer = BLEU(tokenize='intl', lowercase=True, smooth_method='exp') def section_bleu(gen_txt: str, ref_txt: str) -> float: """ Calcula BLEU para um par de strings (seção), retornando score de 0 a 100. """ if not gen_txt.strip() and not ref_txt.strip(): return 100.0 if (not gen_txt.strip()) ^ (not ref_txt.strip()): return 0.0 return _bleu_scorer.sentence_score(gen_txt, [ref_txt]).score def full_bleu(gen_raw: str, ref_raw: str) -> float: """ Calcula BLEU global para strings completas, retornando score de 0 a 100. """ gen = normalize_and_flatten(gen_raw) ref = normalize_and_flatten(ref_raw) if not gen and not ref: return 100.0 if (not gen) ^ (not ref): return 0.0 return _bleu_scorer.sentence_score(gen, [ref]).score def compute_bleu_single(reference: str, prediction: str) -> str: """ Compute and format BLEU score for a single pair. """ if not reference or not prediction: return "Please provide both texts." score = full_bleu(prediction, reference) / 100.0 return f"BLEU Score: {score:.4f}"