Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Meta Leaderboard - Top 20 Models</title> | |
| <!-- Link to external stylesheet would be best practice, but keeping it internal for this example --> | |
| <style> | |
| /* | |
| * ================================================================================================= | |
| * 1. SETUP & CUSTOM PROPERTIES (VARIABLES) | |
| * Using variables makes the design system consistent and easy to update. | |
| * Change a color or font here, and it updates everywhere. | |
| * ================================================================================================= | |
| */ | |
| :root { | |
| /* Color Palette */ | |
| --color-background: #0a0a0a; | |
| --color-surface: #121212; | |
| --color-text-primary: #f0f0f0; | |
| --color-text-secondary: #a0a0a0; | |
| --color-border: #2a2a2a; | |
| --color-accent-start: transparent; | |
| --color-accent-end: transparent; | |
| --color-success: #48bb78; | |
| --color-info: #4299e1; | |
| --color-warning: #ed8936; | |
| --color-danger: #f56565; | |
| /* Gradients */ | |
| --gradient-accent: linear-gradient(135deg, var(--color-accent-start) 0%, var(--color-accent-end) 100%); | |
| --gradient-accent-hover: linear-gradient(90deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%); | |
| --gradient-success: linear-gradient(135deg, var(--color-success) 0%, #38a169 100%); | |
| --gradient-info: linear-gradient(135deg, var(--color-info) 0%, #3182ce 100%); | |
| --gradient-warning: linear-gradient(135deg, var(--color-warning) 0%, #dd6b20 100%); | |
| --gradient-danger: linear-gradient(135deg, var(--color-danger) 0%, #e53e3e 100%); | |
| /* Typography */ | |
| --font-family-sans: 'Inter', system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; | |
| --font-size-base: 16px; | |
| --font-weight-normal: 400; | |
| --font-weight-medium: 500; | |
| --font-weight-semibold: 600; | |
| --font-weight-bold: 700; | |
| /* Spacing & Sizing */ | |
| --spacing-xs: 0.25rem; /* 4px */ | |
| --spacing-sm: 0.5rem; /* 8px */ | |
| --spacing-md: 1rem; /* 16px */ | |
| --spacing-lg: 1.5rem; /* 24px */ | |
| --spacing-xl: 2.5rem; /* 40px */ | |
| --border-radius: 16px; | |
| --container-width: 1200px; | |
| /* Transitions & Shadows */ | |
| --transition-duration: 0.3s; | |
| --shadow-lg: 0 20px 60px rgba(0, 0, 0, 0.4); | |
| } | |
| /* | |
| * ================================================================================================= | |
| * 2. GLOBAL STYLES & RESETS | |
| * A more modern reset and base styles. | |
| * ================================================================================================= | |
| */ | |
| *, *::before, *::after { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| user-select: none; | |
| } | |
| html { | |
| scroll-behavior: smooth; | |
| } | |
| body { | |
| font-family: var(--font-family-sans); | |
| background-color: var(--color-background); | |
| /* A subtle gradient background is often more pleasing than solid black */ | |
| background-image: radial-gradient(circle at top, #1a1a1a, var(--color-background)); | |
| color: var(--color-text-primary); | |
| min-height: 100vh; | |
| padding: var(--spacing-xl) var(--spacing-md); | |
| } | |
| .container { | |
| max-width: var(--container-width); | |
| margin: 0 auto; | |
| } | |
| /* | |
| * ================================================================================================= | |
| * 3. TYPOGRAPHY | |
| * Centralized text styles for headings and paragraphs. | |
| * ================================================================================================= | |
| */ | |
| h1 { | |
| text-align: center; | |
| color: var(--color-text-primary); | |
| /* IMPROVEMENT: Fluid typography. Font size smoothly scales with the viewport width. */ | |
| font-size: clamp(1.8rem, 5vw, 2.5rem); | |
| margin-bottom: var(--spacing-sm); | |
| text-shadow: 2px 2px 4px rgba(0,0,0,0.2); | |
| text-transform: capitalize; | |
| } | |
| .subtitle-group { | |
| text-align: center; | |
| margin-bottom: var(--spacing-xl); | |
| } | |
| .subtitle { | |
| color: var(--color-text-secondary); | |
| font-size: 1.1rem; | |
| line-height: 1.5; | |
| } | |
| .subtitle-meta { | |
| color: #555; | |
| font-size: 0.9rem; | |
| margin-top: var(--spacing-xs); | |
| } | |
| /* | |
| * ================================================================================================= | |
| * 4. TABLE STYLES | |
| * The main leaderboard component. | |
| * ================================================================================================= | |
| */ | |
| .table-wrapper { | |
| background: var(--color-surface); | |
| border-radius: var(--border-radius); | |
| border: 1px solid var(--color-border); | |
| box-shadow: var(--shadow-lg); | |
| overflow: hidden; | |
| animation: fadeIn 0.6s ease-out; | |
| } | |
| table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| text-align: left; | |
| } | |
| thead { | |
| background: var(--gradient-accent); | |
| color: white; | |
| /* IMPROVEMENT: Sticky header for better UX on scrollable tables (mobile) */ | |
| position: sticky; | |
| top: 0; | |
| z-index: 10; | |
| } | |
| th { | |
| padding: var(--spacing-md); | |
| font-weight: var(--font-weight-semibold); | |
| font-size: 0.9rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| text-align: center; | |
| user-select: none; /* Good place for this, prevents selecting header text by accident */ | |
| } | |
| th:first-child, td:first-child { text-align: center; } | |
| th:last-child, td:last-child { text-align: center; } | |
| tbody tr { | |
| border-bottom: 1px solid var(--color-border); | |
| /* IMPROVEMENT: Transition specific properties for better performance than 'all' */ | |
| transition: background-color var(--transition-duration) ease, transform var(--transition-duration) ease; | |
| } | |
| tbody tr:hover { | |
| background-color: rgba(255, 255, 255, 0.03); | |
| /* IMPROVEMENT: 'translateY' is often smoother and less jarring than 'scale' */ | |
| transform: translateY(-2px); | |
| } | |
| tbody tr:last-child { | |
| border-bottom: none; | |
| } | |
| td { | |
| padding: var(--spacing-lg) var(--spacing-md); | |
| font-size: 0.95rem; | |
| vertical-align: middle; /* Ensures content aligns nicely if it wraps */ | |
| } | |
| .rank { | |
| font-weight: var(--font-weight-bold); | |
| font-size: 1.1rem; | |
| background: white; | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| padding-right: var(--spacing-sm); | |
| } | |
| .model-name { | |
| text-align: center; | |
| font-weight: var(--font-weight-semibold); | |
| color: var(--color-text-primary); | |
| font-size: 1rem; | |
| } | |
| .score { | |
| font-weight: var(--font-weight-bold); | |
| font-size: 0.9rem; | |
| text-align: center; | |
| color: var(--color-text-secondary); | |
| } | |
| .progress-container { | |
| width: 100%; | |
| height: 8px; | |
| background: #333; | |
| border-radius: 99px; | |
| overflow: hidden; | |
| margin-top: var(--spacing-sm); | |
| } | |
| .progress-bar { | |
| height: 100%; | |
| background: white; | |
| border-radius: 99px; | |
| animation: fillBar 1.5s cubic-bezier(0.25, 1, 0.5, 1); /* Use a more dynamic easing function */ | |
| } | |
| .performance-notes { font-size: 0.85rem; line-height: 1.5; } | |
| /* IMPROVEMENT: DRY principle. Shared styles for strengths/weaknesses. */ | |
| .strengths, .weaknesses { | |
| font-weight: var(--font-weight-medium); | |
| } | |
| .strengths { color: var(--color-success); } | |
| .weaknesses { color: var(--color-danger); } | |
| /* | |
| * ================================================================================================= | |
| * 5. COMPONENTS (Badges & Legend) | |
| * Reusable component styles. | |
| * ================================================================================================= | |
| */ | |
| /* IMPROVEMENT: DRY principle. A base class for all badges. */ | |
| .badge { | |
| display: inline-block; | |
| padding: var(--spacing-xs) var(--spacing-md); | |
| border-radius: 99px; | |
| font-size: 0.75rem; | |
| font-weight: var(--font-weight-semibold); | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| color: white; | |
| white-space: nowrap; | |
| } | |
| /* Modifier classes for colors */ | |
| .badge--excellent { background: var(--gradient-success); } | |
| .badge--good { background: var(--gradient-info); } | |
| .badge--average { background: var(--gradient-warning); } | |
| .badge--poor { background: var(--gradient-danger); } | |
| .legend { | |
| display: flex; | |
| justify-content: center; | |
| gap: var(--spacing-lg); | |
| margin-top: var(--spacing-xl); | |
| flex-wrap: wrap; | |
| } | |
| .legend-item { | |
| display: flex; | |
| align-items: center; | |
| gap: var(--spacing-sm); | |
| font-size: 0.9rem; | |
| } | |
| .legend-color { | |
| width: 30px; | |
| height: 8px; | |
| border-radius: 4px; | |
| } | |
| /* | |
| * ================================================================================================= | |
| * 6. ANIMATIONS & MEDIA QUERIES | |
| * ================================================================================================= | |
| */ | |
| @keyframes fadeIn { | |
| from { opacity: 0; transform: translateY(20px); } | |
| to { opacity: 1; transform: translateY(0); } | |
| } | |
| @keyframes fillBar { | |
| from { width: 0; } | |
| } | |
| /* Responsive adjustments */ | |
| @media (max-width: 768px) { | |
| body { padding: var(--spacing-xl) var(--spacing-sm); } | |
| h1 { font-size: clamp(1.6rem, 7vw, 2rem); } | |
| .subtitle { font-size: 1rem; } | |
| th, td { | |
| padding: var(--spacing-md) var(--spacing-sm); | |
| font-size: 0.85rem; | |
| } | |
| .table-wrapper { | |
| /* On mobile, allow the table itself to scroll horizontally */ | |
| overflow-x: auto; | |
| } | |
| table { | |
| /* Ensure table has a minimum width to make scrolling meaningful */ | |
| min-width: 600px; | |
| } | |
| .legend { | |
| flex-direction: column; | |
| align-items: center; | |
| gap: var(--spacing-md); | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>meta arena</h1> | |
| <!-- FIXED: A <p> cannot be nested inside another <p>. Using a div wrapper is valid. --> | |
| <div class="subtitle-group"> | |
| <p class="subtitle">A few-shot test at small scale</p> | |
| <p class="subtitle-meta">60+ models tested so far and counting...</p> | |
| </div> | |
| <div class="table-wrapper"> | |
| <table id="performanceTable"> | |
| <thead> | |
| <tr> | |
| <th>Rank</th> | |
| <th>Model</th> | |
| <th>Score</th> | |
| <th>Performance Notes</th> | |
| <th>Rating</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <!-- Data will be inserted here by JavaScript --> | |
| </tbody> | |
| </table> | |
| </div> | |
| <div class="legend"> | |
| <!-- Using classes instead of inline styles for cleaner HTML --> | |
| <div class="legend-item"> | |
| <div class="legend-color" style="background: var(--gradient-success);"></div> | |
| <span>Excellent (108+)</span> | |
| </div> | |
| <div class="legend-item"> | |
| <div class="legend-color" style="background: var(--gradient-info);"></div> | |
| <span>Good (91-107)</span> | |
| </div> | |
| <div class="legend-item"> | |
| <div class="legend-color" style="background: var(--gradient-warning);"></div> | |
| <span>Average (75-90)</span> | |
| </div> | |
| <div class="legend-item"> | |
| <div class="legend-color" style="background: var(--gradient-danger);"></div> | |
| <span>Poor (<75)</span> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| // The JavaScript is already well-written. I'll just update class names to match the CSS refactor. | |
| const models = [ | |
| { rank: 1, name: "granite-4.0-h-tiny", score: 103.5, strengths: "Extremely well-rounded; top-tier in logic, math, translation, and synonyms.", weaknesses: "Fails completely at rhyming; hallucinates facts in summarization tasks." }, | |
| { rank: 2, name: "Qwen3-4B-Instruct", score: 102, strengths: "Top performer, excels in core NLP, logic, and factual recall.", weaknesses: "Prone to factual hallucinations in summarization tasks." }, | |
| { rank: 3, name: "lfm2-8b", score: 99, strengths: "Very logical, provides detailed, nuanced answers, strong at misconception correction.", weaknesses: "Struggles with creative tasks like rhyming and procedural sequencing." }, | |
| { rank: 4, name: "Qwen3-MOE-4x0.6B-2.4B-Writing-Thunder-V1.2.Q8_0.gguf", score: 96, strengths: "Strong in logic, math, grammar, and summarization.", weaknesses: "Struggles with rhyming, synonyms, some translation, and procedural sequencing." }, | |
| { rank: 5, name: "granite-3.3-2b-instruct-Q8_0.gguf", score: 95, strengths: "Excels at core NLP, logic, math, and misconception correction.", weaknesses: "Fails completely at NER, rhyming, and procedural sequencing." }, | |
| { rank: 6, name: "granite-3.1-3b-instruct", score: 93.5, strengths: "Highly capable when it works; excellent at summarization and logic.", weaknesses: "Unreliable; frequently outputs junk characters ('{') instead of answering." }, | |
| { rank: 7, name: "lfm2-2.6b", score: 93.5, strengths: "Strong core capabilities, great at grammar and misconception correction.", weaknesses: "Significant weakness in analogy, rhyming, and sequencing tasks." }, | |
| { rank: 8, name: "EXAONE-3.5-2.4B-Instruct-abliterated.Q8_0.gguf", score: 93, strengths: "Excellent at reasoning, summarization, grammar, and misconception correction.", weaknesses: "Fails completely at translation and sequencing; unreliable output formatting." }, | |
| { rank: 9, name: "Qwen3-1.7B", score: 92.5, strengths: "Good overall performance on core tasks and math.", weaknesses: "Fails completely on rhyming and has some odd analogy mistakes." }, | |
| { rank: 10, name: "Llama-3.2-1B-Instruct", score: 92, strengths: "Great at core NLP, math, and code generation.", weaknesses: "Fails badly on misconception correction, sequencing, and paraphrasing." }, | |
| { rank: 11, name: "lfm2-1.2b", score: 90.5, strengths: "Strong core skills like grammar, math, and translation.", weaknesses: "Knowledge gaps (object location) and hallucinates facts in headlines." }, | |
| { rank: 12, name: "Falcon-H1-1.5B-Deep-Instruct", score: 89, strengths: "Excellent summarizer and paraphraser, strong on synonyms.", weaknesses: "Very poor at logical deduction, rhyming, and categorization." }, | |
| { rank: 13, name: "arco-3", score: 83, strengths: "Powerful 0.6b model; perfect at code gen, sentiment, math, and core knowledge.", weaknesses: "Fails completely at summarization (hallucinations), sequencing, and rhyming." }, | |
| { rank: 14, name: "Falcon-H1-1.5B-Instruct", score: 81, strengths: "Good at logic, math, and factual questions.", weaknesses: "Fails translation completely and often gives blank/junk answers." }, | |
| { rank: 15, name: "Llama-3.2-SUN-HDIC-1B-Instruct.Q8_0.gguf", score: 79, strengths: "Strong in synonyms, math, and factual recall; decent at core NLP.", weaknesses: "Complete failure at summarization and misconception correction; bad factual hallucinations." }, | |
| { rank: 16, name: "Piaget-0.6B.Q8_0.gguf", score: 78, strengths: "Excellent at core knowledge tasks: Sentiment, Object Location, Antonyms, Categorization, Math, Factual QA.", weaknesses: "Complete failure at Summarization, Sequencing, and Rhyming. Very poor at Grammar and Misconception Correction." }, | |
| { rank: 17, name: "lfm2-700m", score: 75.5, strengths: "Handles sentiment, math, and logic correctly.", weaknesses: "Many failures in reasoning (cause/effect), tool use, synonyms, and grammar." }, | |
| { rank: 18, name: "granite-4.0-350m-q8_0.gguf", score: 75.0, strengths: "Perfect translation and math; strong vocabulary (synonyms/antonyms) and common-sense knowledge.", weaknesses: "Complete failure on grammar correction; severe factual hallucinations in summarization; weak sequencing." }, | |
| { rank: 19, name: "Qwen3-psychological-reasoning-0.6B.Q8_0.gguf", score: 73, strengths: "Excels at factual recall and classification (Sentiment, Object Location, Math, Factual QA, NER).", weaknesses: "Very poor at reasoning and creativity; complete failure in summarization, sequencing, and rhyming." }, | |
| { rank: 20, name: "qwen2.5-0.5b-instruct", score: 72, strengths: "Decent at math, basic commands, and some logic.", weaknesses: "Fails creative tasks (rhyming, synonyms) and suffers major headline hallucinations." } | |
| ]; | |
| const maxScore = 125; | |
| function getRatingBadge(score) { | |
| if (score >= 108) return '<span class="badge badge--excellent">Excellent</span>'; | |
| if (score >= 91) return '<span class="badge badge--good">Good</span>'; | |
| if (score >= 75) return '<span class="badge badge--average">Average</span>'; | |
| return '<span class="badge badge--poor">Poor</span>'; | |
| } | |
| function populateTable() { | |
| const tbody = document.querySelector('#performanceTable tbody'); | |
| const top20Models = models.slice(0, 20); | |
| top20Models.forEach((model, index) => { | |
| const percentage = (model.score / maxScore) * 100; | |
| const row = document.createElement('tr'); | |
| row.style.animation = `fadeIn 0.5s ease-out ${index * 0.05}s forwards`; | |
| row.style.opacity = 0; | |
| row.innerHTML = ` | |
| <td class="rank">#${model.rank}</td> | |
| <td><div class="model-name">${model.name}</div></td> | |
| <td> | |
| <div class="score">${model.score.toFixed(1)} / ${maxScore}</div> | |
| <div class="progress-container"> | |
| <div class="progress-bar" style="width: ${percentage}%"></div> | |
| </div> | |
| </td> | |
| <td class="performance-notes"> | |
| <div class="strengths">β ${model.strengths}</div> | |
| <div class="weaknesses">β ${model.weaknesses}</div> | |
| </td> | |
| <td>${getRatingBadge(model.score)}</td> | |
| `; | |
| tbody.appendChild(row); | |
| }); | |
| } | |
| document.addEventListener('DOMContentLoaded', populateTable); | |
| </script> | |
| </body> | |
| </html> |