# colors_utils.py """ Continuous, lightness-first color utilities for LLM metric tables. Changes vs. old version: - Replaces 5-bin red→yellow→green with continuous ramps. - Uses colorblind-friendlier palettes (teal sequential; purple↔gray↔teal diverging). - Renders a soft in-cell "progress pill" instead of painting the whole cell. - Keeps API compatible: `get_metric_color(score, metric)` and `df_to_colored_html(df)`. """ from typing import Optional import colorsys import math import pandas as pd # ------------------------------- # Color mapping helpers # ------------------------------- def _clamp(x: float, lo: float, hi: float) -> float: return lo if x < lo else hi if x > hi else x def _hsl_hex(h_deg: float, s: float, l: float) -> str: """ Convert HSL (H in degrees) to #RRGGBB. Note: colorsys uses HLS (H, L, S). We pass in (H, L, S) accordingly. """ h = (h_deg % 360.0) / 360.0 r, g, b = colorsys.hls_to_rgb(h, l, s) return "#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255)) def _seq_color_01(t: float, *, hue: float = 200.0, s: float = 0.55, l_min: float = 0.18, l_max: float = 0.56, gamma: float = 0.85) -> str: """ Sequential ramp for metrics in [0,1] (e.g., BLEU, ROUGE, BERTScore). Single hue (default teal ≈ 200°), smoothly varying lightness. """ t = _clamp(float(t), 0.0, 1.0) ** gamma L = l_min + (l_max - l_min) * t return _hsl_hex(hue, s, L) def _div_color_m11(x: float, *, hue_lo: float = 280.0, hue_hi: float = 190.0, s: float = 0.55, l_mid: float = 0.24, l_span: float = 0.22, gamma: float = 0.9) -> str: """ Diverging ramp for metrics in [-1, 1] (e.g., BLEURT). Purple (neg) ↔ neutral gray (0) ↔ teal (pos). Hue shifts only at ends; the zero point is represented by a low-chroma grayish lightness. """ t = _clamp((float(x) + 1.0) / 2.0, 0.0, 1.0) ** gamma # Interpolate lightness around a mid gray; avoid oversaturation at 0 if t < 0.5: # Toward negative: darker purple u = t / 0.5 # 0..1 L = l_mid - l_span * (1.0 - u) return _hsl_hex(hue_lo, s, L) else: # Toward positive: brighter teal u = (t - 0.5) / 0.5 # 0..1 L = l_mid + l_span * u return _hsl_hex(hue_hi, s, L) def _norm_for_bar(metric: str, score: Optional[float]) -> float: """ Normalize a score to [0,1] for bar width. BLEURT is [-1,1], others ~[0,1]. """ if score is None or (isinstance(score, float) and not math.isfinite(score)): return 0.0 m = metric.upper() if m == "BLEURT": return _clamp((float(score) + 1.0) / 2.0, 0.0, 1.0) # BLEU, ROUGE, BERTSCORE default assumption: already ~[0,1] return _clamp(float(score), 0.0, 1.0) def get_metric_color(score: Optional[float], metric: str = "BLEU") -> str: """ Public API: map a (metric, score) to a visually pleasant, continuous color. - BLEURT uses a diverging purple↔teal ramp with a neutral midpoint. - BLEU/ROUGE/BERTSCORE use a single-hue teal sequential ramp. Returns a neutral deep gray if score is None or not finite. """ if score is None or (isinstance(score, float) and not math.isfinite(score)): return "#2f3240" # neutral m = metric.upper() if m == "BLEURT": return _div_color_m11(float(score)) else: return _seq_color_01(float(score)) def _readable_text_on(bg_hex: str) -> str: """ Choose black or white text for contrast on a given bg color. """ try: r = int(bg_hex[1:3], 16) / 255.0 g = int(bg_hex[3:5], 16) / 255.0 b = int(bg_hex[5:7], 16) / 255.0 except Exception: # default to white on bad input return "#ffffff" # Relative luminance (approx, with a gamma comp) Y = 0.2126 * (r ** 2.2) + 0.7152 * (g ** 2.2) + 0.0722 * (b ** 2.2) return "#0b0f14" if Y > 0.5 else "#ffffff" # ------------------------------- # HTML rendering for DataFrames # ------------------------------- def _metric_from_col(col: str) -> Optional[str]: c = col.lower() if c.startswith("bleurt_"): return "BLEURT" if c.startswith("bleu_"): return "BLEU" if c.startswith("rouge") or c.startswith("rougel_"): return "ROUGE" if c.startswith("bertscore_") or c.startswith("bert_score"): return "BERTSCORE" return None def _fmt_value(val) -> str: if val is None: return "" # keep audio id clean try: if isinstance(val, float): return f"{val:.4f}" return str(val) except Exception: return str(val) def df_to_colored_html(df: pd.DataFrame) -> str: """ Render a dark-themed HTML table with soft, continuous "progress pills" behind metric values. Keeps the background neutral (dark) to reduce fatigue; the color is confined to the small bar. """ df_display = df.copy() # Column order: ensure 'code_audio_transcription' first if present headers = list(df_display.columns) if "code_audio_transcription" in headers: headers = ["code_audio_transcription"] + [h for h in headers if h != "code_audio_transcription"] # Table shell html = [ '
', '' "" ] # Header row for h in headers: html.append( f'' ) html.append("") # Rows row_bg_a = "#0b0f14" row_bg_b = "#0e131b" for i, (_, row) in enumerate(df_display.iterrows()): row_bg = row_bg_a if (i % 2 == 0) else row_bg_b html.append(f"") for col in headers: val = row.get(col, None) metric = _metric_from_col(col) disp = _fmt_value(val) if metric is None: # Non-metric column: plain cell html.append( "" ) continue # Metric column: compute bar + color color = get_metric_color(val, metric) if pd.notnull(val) else "transparent" width01 = _norm_for_bar(metric, float(val)) if pd.notnull(val) else 0.0 bar_width_pct = f"{width01 * 100:.1f}%" text_color = "#e6ebf2" # keep neutral text on dark background # A subtle inner background for the pill track track_bg = "#141a24" cell_html = f""" """ html.append(cell_html) html.append("") html.append("
{h}
" f"{disp}
{disp}
") return "".join(html)