Spaces:

ij
/

ArtistEmbeddingClassifier

Running on Zero

App Files Files Community

iljung1106 commited on 1 day ago

Commit

5570c3c

1 Parent(s): 39e77fe

combined classify and analyze

Browse files

Files changed (2) hide show

app/visualization.py +28 -34
webui_gradio.py +57 -102

app/visualization.py CHANGED Viewed

@@ -261,41 +261,35 @@ def analyze_views(
     )
-def format_analysis_text(analysis: ViewAnalysis) -> str:
-    """Format analysis results as markdown text."""
-    lines = ["## 📊 View & Branch Analysis\n"]
-    # View weights
-    lines.append("### View Attention Weights")
-    lines.append("How much each view contributed to the final embedding:\n")
     for k in ("whole", "face", "eyes"):
         w = analysis.view_weights.get(k, 0.0)
-        bar_len = int(w * 20)
-        bar = "█" * bar_len + "░" * (20 - bar_len)
-        lines.append(f"- **{k.capitalize()}**: `{bar}` {w:.1%}")
-    lines.append("")
-    # Branch weights per view
-    lines.append("### Branch Attention Weights (per view)")
-    lines.append("Which style features were most important:\n")
-    branch_names = ["Gram", "Cov", "Spectrum", "Stats"]
-    branch_desc = {
-        "Gram": "texture patterns",
-        "Cov": "color correlations",
-        "Spectrum": "frequency content",
-        "Stats": "mean/variance",
-    }
-    for view_name in ("whole", "face", "eyes"):
-        bw = analysis.branch_weights.get(view_name, {})
-        if bw:
-            lines.append(f"\n**{view_name.capitalize()}**:")
-            for b in branch_names:
-                w = bw.get(b, 0.0)
-                bar_len = int(w * 15)
-                bar = "▓" * bar_len + "░" * (15 - bar_len)
-                lines.append(f"  - {b} ({branch_desc[b]}): `{bar}` {w:.1%}")
-    return "\n".join(lines)

     )
+def format_view_weights_html(analysis: ViewAnalysis) -> str:
+    """Format view weights as clean HTML with styled progress bars."""
+    # View labels with descriptions (eye = singular)
+    view_info = {
+        "whole": ("Whole Image", "#4CAF50"),  # green
+        "face": ("Face", "#2196F3"),  # blue
+        "eyes": ("Eye Region", "#FF9800"),  # orange
+    }
+    html_parts = ['<div style="font-family: sans-serif; padding: 10px;">']
+    html_parts.append('<h3 style="margin-bottom: 15px;">📊 View Contribution</h3>')
     for k in ("whole", "face", "eyes"):
         w = analysis.view_weights.get(k, 0.0)
+        label, color = view_info[k]
+        pct = int(w * 100)
+        html_parts.append(f'''
+        <div style="margin-bottom: 12px;">
+            <div style="display: flex; justify-content: space-between; margin-bottom: 4px;">
+                <span style="font-weight: 500;">{label}</span>
+                <span style="font-weight: 600; color: {color};">{pct}%</span>
+            </div>
+            <div style="background: #e0e0e0; border-radius: 4px; height: 20px; overflow: hidden;">
+                <div style="background: {color}; width: {pct}%; height: 100%; border-radius: 4px; transition: width 0.3s;"></div>
+            </div>
+        </div>
+        ''')
+    html_parts.append('</div>')
+    return "".join(html_parts)

webui_gradio.py CHANGED Viewed

@@ -166,7 +166,7 @@ _patch_gradio_client_bool_jsonschema()
 from app.model_io import LoadedModel, embed_triview, load_style_model
 from app.proto_db import PrototypeDB, load_prototype_db, topk_predictions_unique_labels
 from app.view_extractor import AnimeFaceEyeExtractor, ExtractorCfg
-from app.visualization import ViewAnalysis, analyze_views, format_analysis_text
 ROOT = Path(__file__).resolve().parent
@@ -268,16 +268,19 @@ def load_all(ckpt_path: str, proto_path: str, device: str) -> str:
     return f"✅ Loaded checkpoint `{Path(ckpt_path).name}` (stage={lm.stage_i}) and proto DB `{Path(proto_path).name}` (N={db.centers.shape[0]})"
-def classify(
     whole_img,
     topk: int,
 ):
     """
-    Classify using auto-extracted face/eyes from whole image.
-    Returns: status, table_rows, face_preview, eyes_preview
     """
     if APP_STATE.lm is None or APP_STATE.db is None:
-        return "❌ Click **Load** first.", [], None, None
     lm = APP_STATE.lm
     db = APP_STATE.db
@@ -292,88 +295,48 @@ def classify(
     w = _to_pil(whole_img)
     if w is None:
-        return "❌ Provide a whole image.", [], None, None
     try:
         face_pil = None
-        eyes_pil = None
         if ex is not None:
             rgb = np.array(w.convert("RGB"))
-            face_rgb, eyes_rgb = ex.extract(rgb)
             if face_rgb is not None:
                 face_pil = Image.fromarray(face_rgb)
-            if eyes_rgb is not None:
-                eyes_pil = Image.fromarray(eyes_rgb)
         wt = _pil_to_tensor(w, lm.T_w)
         ft = _pil_to_tensor(face_pil, lm.T_f) if face_pil is not None else None
-        et = _pil_to_tensor(eyes_pil, lm.T_e) if eyes_pil is not None else None
         z = embed_triview(lm, whole=wt, face=ft, eyes=et)
         preds = topk_predictions_unique_labels(db, z, topk=int(topk))
-    except Exception as ex:
-        return f"❌ Inference failed: {ex}", [], None, None
-    rows = [[name, float(score)] for (name, score) in preds]
-    return "✅ OK", rows, (face_pil if "face_pil" in locals() else None), (eyes_pil if "eyes_pil" in locals() else None)
-def analyze_image(whole_img):
-    """
-    Analyze an image showing view weights, branch weights, and Grad-CAM.
-    Returns: status, analysis_text, whole_gradcam, face_gradcam, eyes_gradcam, face_preview, eyes_preview
-    """
-    if APP_STATE.lm is None:
-        return "❌ Click **Load** first.", "", None, None, None, None, None
-    lm = APP_STATE.lm
-    ex = APP_STATE.extractor
-    def _to_pil(x):
-        if x is None:
-            return None
-        if isinstance(x, Image.Image):
-            return x
-        return Image.fromarray(x)
-    w = _to_pil(whole_img)
-    if w is None:
-        return "❌ Provide a whole image.", "", None, None, None, None, None
-    try:
-        # Extract face and eyes
-        face_pil = None
-        eyes_pil = None
-        if ex is not None:
-            rgb = np.array(w.convert("RGB"))
-            face_rgb, eyes_rgb = ex.extract(rgb)
-            if face_rgb is not None:
-                face_pil = Image.fromarray(face_rgb)
-            if eyes_rgb is not None:
-                eyes_pil = Image.fromarray(eyes_rgb)
-        # Prepare tensors
-        wt = _pil_to_tensor(w, lm.T_w)
-        ft = _pil_to_tensor(face_pil, lm.T_f) if face_pil is not None else None
-        et = _pil_to_tensor(eyes_pil, lm.T_e) if eyes_pil is not None else None
         views = {"whole": wt, "face": ft, "eyes": et}
-        original_images = {"whole": w, "face": face_pil, "eyes": eyes_pil}
-        # Run analysis
         analysis = analyze_views(lm.model, views, original_images, lm.device)
-        analysis_text = format_analysis_text(analysis)
         return (
-            "✅ Analysis complete",
-            analysis_text,
             analysis.gradcam_heatmaps.get("whole"),
             analysis.gradcam_heatmaps.get("face"),
             analysis.gradcam_heatmaps.get("eyes"),
             face_pil,
-            eyes_pil,
         )
     except Exception as e:
-        return f"❌ Analysis failed: {e}", "", None, None, None, None, None
 def _gallery_item_to_pil(item) -> Optional[Image.Image]:
@@ -569,46 +532,38 @@ def build_ui() -> gr.Blocks:
         with gr.Tab("Classify"):
             with gr.Row():
-                whole = gr.Image(label="Whole image (required)", type="pil")
-                face_prev = gr.Image(label="Extracted face (auto)", type="pil")
-                eyes_prev = gr.Image(label="Extracted eyes (auto)", type="pil")
-            with gr.Row():
-                topk = gr.Slider(1, 20, value=5, step=1, label="Top-K")
-                run_btn = gr.Button("Run", variant="primary")
-            out_status = gr.Markdown("")
-            table = gr.Dataframe(headers=["label", "cosine_sim"], datatype=["str", "number"], interactive=False)
-            run_btn.click(classify, inputs=[whole, topk], outputs=[out_status, table, face_prev, eyes_prev])
-        with gr.Tab("Analyze (Grad-CAM)"):
-            gr.Markdown(
-                "### 🔍 View & Branch Analysis with Grad-CAM\n"
-                "Visualize which parts of the image and which style features the model focuses on.\n"
-                "- **View weights**: How much each view (whole/face/eyes) contributed\n"
-                "- **Branch weights**: Which style features (Gram/Cov/Spectrum/Stats) were important\n"
-                "- **Grad-CAM**: Spatial attention heatmaps showing where the model looked"
-            )
-            with gr.Row():
-                analyze_input = gr.Image(label="Whole image", type="pil")
-            analyze_btn = gr.Button("Analyze", variant="primary")
-            analyze_status = gr.Markdown("")
-            analyze_text = gr.Markdown("")
-            gr.Markdown("### Grad-CAM Heatmaps")
             with gr.Row():
-                gcam_whole = gr.Image(label="Whole (Grad-CAM)", type="pil")
-                gcam_face = gr.Image(label="Face (Grad-CAM)", type="pil")
-                gcam_eyes = gr.Image(label="Eyes (Grad-CAM)", type="pil")
-            gr.Markdown("### Extracted Views")
             with gr.Row():
-                analyze_face = gr.Image(label="Extracted Face", type="pil")
-                analyze_eyes = gr.Image(label="Extracted Eyes", type="pil")
-            analyze_btn.click(
-                analyze_image,
-                inputs=[analyze_input],
-                outputs=[analyze_status, analyze_text, gcam_whole, gcam_face, gcam_eyes, analyze_face, analyze_eyes],
             )
         with gr.Tab("Add prototype (temporary)"):
@@ -616,8 +571,8 @@ def build_ui() -> gr.Blocks:
                 "### ⚠️ Temporary Prototypes Only\n"
                 "Add prototypes using random triplet combinations and K-means clustering (same as eval process).\n"
                 "1. Upload multiple whole images\n"
-                "2. Face/eyes are auto-extracted from each\n"
-                "3. Random triplets (whole + face + eyes) are created\n"
                 "4. K-means clustering creates K prototype centers\n\n"
                 "**These prototypes are session-only** — lost when the Space restarts."
             )

 from app.model_io import LoadedModel, embed_triview, load_style_model
 from app.proto_db import PrototypeDB, load_prototype_db, topk_predictions_unique_labels
 from app.view_extractor import AnimeFaceEyeExtractor, ExtractorCfg
+from app.visualization import ViewAnalysis, analyze_views, format_view_weights_html
 ROOT = Path(__file__).resolve().parent
     return f"✅ Loaded checkpoint `{Path(ckpt_path).name}` (stage={lm.stage_i}) and proto DB `{Path(proto_path).name}` (N={db.centers.shape[0]})"
+def classify_and_analyze(
     whole_img,
     topk: int,
 ):
     """
+    Classify and analyze an image in one pass.
+    Returns: status, table_rows, view_weights_html,
+             gcam_whole, gcam_face, gcam_eye, face_preview, eye_preview
     """
+    empty_result = ("", [], "", None, None, None, None, None)
     if APP_STATE.lm is None or APP_STATE.db is None:
+        return ("❌ Click **Load** first.",) + empty_result[1:]
     lm = APP_STATE.lm
     db = APP_STATE.db
     w = _to_pil(whole_img)
     if w is None:
+        return ("❌ Provide a whole image.",) + empty_result[1:]
     try:
+        # Extract face and eye region
         face_pil = None
+        eye_pil = None
         if ex is not None:
             rgb = np.array(w.convert("RGB"))
+            face_rgb, eye_rgb = ex.extract(rgb)
             if face_rgb is not None:
                 face_pil = Image.fromarray(face_rgb)
+            if eye_rgb is not None:
+                eye_pil = Image.fromarray(eye_rgb)
+        # Prepare tensors
         wt = _pil_to_tensor(w, lm.T_w)
         ft = _pil_to_tensor(face_pil, lm.T_f) if face_pil is not None else None
+        et = _pil_to_tensor(eye_pil, lm.T_e) if eye_pil is not None else None
+        # Classification
         z = embed_triview(lm, whole=wt, face=ft, eyes=et)
         preds = topk_predictions_unique_labels(db, z, topk=int(topk))
+        rows = [[name, float(score)] for (name, score) in preds]
+        # Analysis (Grad-CAM + view weights)
         views = {"whole": wt, "face": ft, "eyes": et}
+        original_images = {"whole": w, "face": face_pil, "eyes": eye_pil}
         analysis = analyze_views(lm.model, views, original_images, lm.device)
+        view_weights_html = format_view_weights_html(analysis)
         return (
+            "✅ Done",
+            rows,
+            view_weights_html,
             analysis.gradcam_heatmaps.get("whole"),
             analysis.gradcam_heatmaps.get("face"),
             analysis.gradcam_heatmaps.get("eyes"),
             face_pil,
+            eye_pil,
         )
     except Exception as e:
+        return (f"❌ Failed: {e}",) + empty_result[1:]
 def _gallery_item_to_pil(item) -> Optional[Image.Image]:
         with gr.Tab("Classify"):
             with gr.Row():
+                with gr.Column(scale=1):
+                    whole = gr.Image(label="Upload image", type="pil")
+                    with gr.Row():
+                        topk = gr.Slider(1, 20, value=5, step=1, label="Top-K")
+                        run_btn = gr.Button("Run", variant="primary")
+                    out_status = gr.Markdown("")
+                with gr.Column(scale=1):
+                    view_weights_display = gr.HTML(label="View Contribution")
+            # Classification results
+            gr.Markdown("### 🎯 Classification Results")
+            table = gr.Dataframe(headers=["Artist", "Similarity"], datatype=["str", "number"], interactive=False)
+            # Grad-CAM heatmaps
+            gr.Markdown("### 🔥 Grad-CAM Attention Maps")
+            gr.Markdown("*Where the model focused in each view:*")
             with gr.Row():
+                gcam_whole = gr.Image(label="Whole Image", type="pil")
+                gcam_face = gr.Image(label="Face", type="pil")
+                gcam_eye = gr.Image(label="Eye Region", type="pil")
+            # Extracted views
+            gr.Markdown("### 👁️ Auto-Extracted Views")
             with gr.Row():
+                face_prev = gr.Image(label="Detected Face", type="pil")
+                eye_prev = gr.Image(label="Detected Eye", type="pil")
+            run_btn.click(
+                classify_and_analyze,
+                inputs=[whole, topk],
+                outputs=[out_status, table, view_weights_display, gcam_whole, gcam_face, gcam_eye, face_prev, eye_prev],
             )
         with gr.Tab("Add prototype (temporary)"):
                 "### ⚠️ Temporary Prototypes Only\n"
                 "Add prototypes using random triplet combinations and K-means clustering (same as eval process).\n"
                 "1. Upload multiple whole images\n"
+                "2. Face and eye region are auto-extracted from each\n"
+                "3. Random triplets (whole + face + eye) are created\n"
                 "4. K-means clustering creates K prototype centers\n\n"
                 "**These prototypes are session-only** — lost when the Space restarts."
             )