Spaces:
Sleeping
Sleeping
| # metrics/bleu.py | |
| """ | |
| BLEU metric wrappers using sacreBLEU and file_utils. | |
| """ | |
| from utils.file_utils import * | |
| from sacrebleu.metrics import BLEU | |
| # Instância global de BLEU com tokenização 'intl', lowercase e smoothing 'exp' | |
| _bleu_scorer = BLEU(tokenize='intl', lowercase=True, smooth_method='exp') | |
| def section_bleu(gen_txt: str, ref_txt: str) -> float: | |
| """ | |
| Calcula BLEU para um par de strings (seção), retornando score de 0 a 100. | |
| """ | |
| if not gen_txt.strip() and not ref_txt.strip(): | |
| return 100.0 | |
| if (not gen_txt.strip()) ^ (not ref_txt.strip()): | |
| return 0.0 | |
| return _bleu_scorer.sentence_score(gen_txt, [ref_txt]).score | |
| def full_bleu(gen_raw: str, ref_raw: str) -> float: | |
| """ | |
| Calcula BLEU global para strings completas, retornando score de 0 a 100. | |
| """ | |
| gen = normalize_and_flatten(gen_raw) | |
| ref = normalize_and_flatten(ref_raw) | |
| if not gen and not ref: | |
| return 100.0 | |
| if (not gen) ^ (not ref): | |
| return 0.0 | |
| return _bleu_scorer.sentence_score(gen, [ref]).score | |
| def compute_bleu_single(reference: str, prediction: str) -> str: | |
| """ | |
| Compute and format BLEU score for a single pair. | |
| """ | |
| if not reference or not prediction: | |
| return "Please provide both texts." | |
| score = full_bleu(prediction, reference) / 100.0 | |
| return f"BLEU Score: {score:.4f}" | |