Spaces:

gjoliveira
/

data-ai-llm-eval-app

Sleeping

File size: 8,036 Bytes

# colors_utils.py
"""

Continuous, lightness-first color utilities for LLM metric tables.



Changes vs. old version:

- Replaces 5-bin red→yellow→green with continuous ramps.

- Uses colorblind-friendlier palettes (teal sequential; purple↔gray↔teal diverging).

- Renders a soft in-cell "progress pill" instead of painting the whole cell.

- Keeps API compatible: `get_metric_color(score, metric)` and `df_to_colored_html(df)`.

"""

from typing import Optional
import colorsys
import math
import pandas as pd

# -------------------------------
# Color mapping helpers
# -------------------------------

def _clamp(x: float, lo: float, hi: float) -> float:
    return lo if x < lo else hi if x > hi else x

def _hsl_hex(h_deg: float, s: float, l: float) -> str:
    """

    Convert HSL (H in degrees) to #RRGGBB.

    Note: colorsys uses HLS (H, L, S). We pass in (H, L, S) accordingly.

    """
    h = (h_deg % 360.0) / 360.0
    r, g, b = colorsys.hls_to_rgb(h, l, s)
    return "#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255))

def _seq_color_01(t: float, *, hue: float = 200.0, s: float = 0.55,

                  l_min: float = 0.18, l_max: float = 0.56, gamma: float = 0.85) -> str:
    """

    Sequential ramp for metrics in [0,1] (e.g., BLEU, ROUGE, BERTScore).

    Single hue (default teal ≈ 200°), smoothly varying lightness.

    """
    t = _clamp(float(t), 0.0, 1.0) ** gamma
    L = l_min + (l_max - l_min) * t
    return _hsl_hex(hue, s, L)

def _div_color_m11(x: float, *, hue_lo: float = 280.0, hue_hi: float = 190.0,

                   s: float = 0.55, l_mid: float = 0.24, l_span: float = 0.22,

                   gamma: float = 0.9) -> str:
    """

    Diverging ramp for metrics in [-1, 1] (e.g., BLEURT).

    Purple (neg) ↔ neutral gray (0) ↔ teal (pos). Hue shifts only at ends;

    the zero point is represented by a low-chroma grayish lightness.

    """
    t = _clamp((float(x) + 1.0) / 2.0, 0.0, 1.0) ** gamma
    # Interpolate lightness around a mid gray; avoid oversaturation at 0
    if t < 0.5:
        # Toward negative: darker purple
        u = t / 0.5  # 0..1
        L = l_mid - l_span * (1.0 - u)
        return _hsl_hex(hue_lo, s, L)
    else:
        # Toward positive: brighter teal
        u = (t - 0.5) / 0.5  # 0..1
        L = l_mid + l_span * u
        return _hsl_hex(hue_hi, s, L)

def _norm_for_bar(metric: str, score: Optional[float]) -> float:
    """

    Normalize a score to [0,1] for bar width. BLEURT is [-1,1], others ~[0,1].

    """
    if score is None or (isinstance(score, float) and not math.isfinite(score)):
        return 0.0
    m = metric.upper()
    if m == "BLEURT":
        return _clamp((float(score) + 1.0) / 2.0, 0.0, 1.0)
    # BLEU, ROUGE, BERTSCORE default assumption: already ~[0,1]
    return _clamp(float(score), 0.0, 1.0)

def get_metric_color(score: Optional[float], metric: str = "BLEU") -> str:
    """

    Public API: map a (metric, score) to a visually pleasant, continuous color.

    - BLEURT uses a diverging purple↔teal ramp with a neutral midpoint.

    - BLEU/ROUGE/BERTSCORE use a single-hue teal sequential ramp.

    Returns a neutral deep gray if score is None or not finite.

    """
    if score is None or (isinstance(score, float) and not math.isfinite(score)):
        return "#2f3240"  # neutral
    m = metric.upper()
    if m == "BLEURT":
        return _div_color_m11(float(score))
    else:
        return _seq_color_01(float(score))

def _readable_text_on(bg_hex: str) -> str:
    """

    Choose black or white text for contrast on a given bg color.

    """
    try:
        r = int(bg_hex[1:3], 16) / 255.0
        g = int(bg_hex[3:5], 16) / 255.0
        b = int(bg_hex[5:7], 16) / 255.0
    except Exception:
        # default to white on bad input
        return "#ffffff"
    # Relative luminance (approx, with a gamma comp)
    Y = 0.2126 * (r ** 2.2) + 0.7152 * (g ** 2.2) + 0.0722 * (b ** 2.2)
    return "#0b0f14" if Y > 0.5 else "#ffffff"

# -------------------------------
# HTML rendering for DataFrames
# -------------------------------

def _metric_from_col(col: str) -> Optional[str]:
    c = col.lower()
    if c.startswith("bleurt_"):
        return "BLEURT"
    if c.startswith("bleu_"):
        return "BLEU"
    if c.startswith("rouge") or c.startswith("rougel_"):
        return "ROUGE"
    if c.startswith("bertscore_") or c.startswith("bert_score"):
        return "BERTSCORE"
    return None

def _fmt_value(val) -> str:
    if val is None:
        return ""
    # keep audio id clean
    try:
        if isinstance(val, float):
            return f"{val:.4f}"
        return str(val)
    except Exception:
        return str(val)

def df_to_colored_html(df: pd.DataFrame) -> str:
    """

    Render a dark-themed HTML table with soft, continuous "progress pills"

    behind metric values. Keeps the background neutral (dark) to reduce

    fatigue; the color is confined to the small bar.

    """
    df_display = df.copy()

    # Column order: ensure 'code_audio_transcription' first if present
    headers = list(df_display.columns)
    if "code_audio_transcription" in headers:
        headers = ["code_audio_transcription"] + [h for h in headers if h != "code_audio_transcription"]

    # Table shell
    html = [
        '<div style="font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial;'
        ' font-size:14px; color:#e6ebf2;">',
        '<table style="border-collapse:separate; border-spacing:0; width:100%; background:#0b0f14;'
        ' border:1px solid #202532; border-radius:10px; overflow:hidden;">'
        "<thead><tr>"
    ]

    # Header row
    for h in headers:
        html.append(
            f'<th style="position:sticky; top:0; background:#121722; color:#c9d4e3;'
            f' padding:10px 12px; border-bottom:1px solid #1b2030; text-align:center;'
            f' font-weight:600; white-space:nowrap;">{h}</th>'
        )
    html.append("</tr></thead><tbody>")

    # Rows
    row_bg_a = "#0b0f14"
    row_bg_b = "#0e131b"
    for i, (_, row) in enumerate(df_display.iterrows()):
        row_bg = row_bg_a if (i % 2 == 0) else row_bg_b
        html.append(f"<tr style='background:{row_bg};'>")

        for col in headers:
            val = row.get(col, None)
            metric = _metric_from_col(col)
            disp = _fmt_value(val)

            if metric is None:
                # Non-metric column: plain cell
                html.append(
                    "<td style='padding:10px 14px; border-bottom:1px solid #121722;"
                    " text-align:center; color:#e6ebf2; white-space:nowrap;'>"
                    f"{disp}</td>"
                )
                continue

            # Metric column: compute bar + color
            color = get_metric_color(val, metric) if pd.notnull(val) else "transparent"
            width01 = _norm_for_bar(metric, float(val)) if pd.notnull(val) else 0.0
            bar_width_pct = f"{width01 * 100:.1f}%"
            text_color = "#e6ebf2"  # keep neutral text on dark background
            # A subtle inner background for the pill track
            track_bg = "#141a24"

            cell_html = f"""

            <td style="padding:10px 14px; border-bottom:1px solid #121722; text-align:center; white-space:nowrap;">

              <div style="position:relative; height:22px; border-radius:7px; background:{track_bg}; overflow:hidden;">

                <div style="position:absolute; inset:0; width:{bar_width_pct}; background:{color}; opacity:.75;"></div>

                <div style="position:relative; z-index:1; line-height:22px; color:{text_color};">{disp}</div>

              </div>

            </td>

            """
            html.append(cell_html)

        html.append("</tr>")

    html.append("</tbody></table></div>")
    return "".join(html)