| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import json |
| import os |
| from typing import Dict, Any |
|
|
| from engine.parser_rules import parse_text_rules |
| from engine.parser_ext import parse_text_extended |
|
|
|
|
| |
| GOLD_PATH = "training/gold_tests.json" |
|
|
|
|
| def evaluate_single_test(test: Dict[str, Any]) -> Dict[str, Any]: |
| """ |
| Evaluate one gold test with rules + extended parsers. |
| """ |
| text = test.get("input", "") |
| expected = test.get("expected", {}) |
|
|
| |
| rule_out = parse_text_rules(text).get("parsed_fields", {}) |
| ext_out = parse_text_extended(text).get("parsed_fields", {}) |
|
|
| |
| merged = dict(rule_out) |
| for k, v in ext_out.items(): |
| if v != "Unknown": |
| merged[k] = v |
|
|
| total = len(expected) |
| correct = 0 |
| wrong = {} |
|
|
| for field, exp_val in expected.items(): |
| got = merged.get(field, "Unknown") |
| if got.lower() == exp_val.lower(): |
| correct += 0 if exp_val == "Unknown" else 1 |
| else: |
| wrong[field] = {"expected": exp_val, "got": got} |
|
|
| return { |
| "correct": correct, |
| "total": total, |
| "accuracy": correct / total if total else 0, |
| "wrong": wrong, |
| "merged": merged, |
| } |
|
|
|
|
| def run_parser_eval(mode: str = "rules_extended") -> Dict[str, Any]: |
| """ |
| Evaluate ALL gold tests using rules + extended parsing only. |
| """ |
| if not os.path.exists(GOLD_PATH): |
| return {"error": f"Gold test file not found at {GOLD_PATH}"} |
|
|
| with open(GOLD_PATH, "r", encoding="utf-8") as f: |
| gold = json.load(f) |
|
|
| results = [] |
| wrong_cases = [] |
|
|
| total_correct = 0 |
| total_fields = 0 |
|
|
| for test in gold: |
| out = evaluate_single_test(test) |
| results.append(out) |
|
|
| total_correct += out["correct"] |
| total_fields += out["total"] |
|
|
| if out["wrong"]: |
| wrong_cases.append({ |
| "name": test.get("name", "Unnamed"), |
| "wrong": out["wrong"], |
| "parsed": out["merged"], |
| "expected": test.get("expected", {}) |
| }) |
|
|
| summary = { |
| "mode": "rules+extended", |
| "tests": len(gold), |
| "total_correct": total_correct, |
| "total_fields": total_fields, |
| "overall_accuracy": total_correct / total_fields if total_fields else 0, |
| "wrong_cases": wrong_cases, |
| } |
|
|
| return summary |
|
|