meta-arena / index.html
appvoid's picture
Update index.html
ffe1dc5 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Meta Leaderboard - Top 20 Models</title>
<!-- Link to external stylesheet would be best practice, but keeping it internal for this example -->
<style>
/*
* =================================================================================================
* 1. SETUP & CUSTOM PROPERTIES (VARIABLES)
* Using variables makes the design system consistent and easy to update.
* Change a color or font here, and it updates everywhere.
* =================================================================================================
*/
:root {
/* Color Palette */
--color-background: #0a0a0a;
--color-surface: #121212;
--color-text-primary: #f0f0f0;
--color-text-secondary: #a0a0a0;
--color-border: #2a2a2a;
--color-accent-start: transparent;
--color-accent-end: transparent;
--color-success: #48bb78;
--color-info: #4299e1;
--color-warning: #ed8936;
--color-danger: #f56565;
/* Gradients */
--gradient-accent: linear-gradient(135deg, var(--color-accent-start) 0%, var(--color-accent-end) 100%);
--gradient-accent-hover: linear-gradient(90deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%);
--gradient-success: linear-gradient(135deg, var(--color-success) 0%, #38a169 100%);
--gradient-info: linear-gradient(135deg, var(--color-info) 0%, #3182ce 100%);
--gradient-warning: linear-gradient(135deg, var(--color-warning) 0%, #dd6b20 100%);
--gradient-danger: linear-gradient(135deg, var(--color-danger) 0%, #e53e3e 100%);
/* Typography */
--font-family-sans: 'Inter', system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
--font-size-base: 16px;
--font-weight-normal: 400;
--font-weight-medium: 500;
--font-weight-semibold: 600;
--font-weight-bold: 700;
/* Spacing & Sizing */
--spacing-xs: 0.25rem; /* 4px */
--spacing-sm: 0.5rem; /* 8px */
--spacing-md: 1rem; /* 16px */
--spacing-lg: 1.5rem; /* 24px */
--spacing-xl: 2.5rem; /* 40px */
--border-radius: 16px;
--container-width: 1200px;
/* Transitions & Shadows */
--transition-duration: 0.3s;
--shadow-lg: 0 20px 60px rgba(0, 0, 0, 0.4);
}
/*
* =================================================================================================
* 2. GLOBAL STYLES & RESETS
* A more modern reset and base styles.
* =================================================================================================
*/
*, *::before, *::after {
margin: 0;
padding: 0;
box-sizing: border-box;
user-select: none;
}
html {
scroll-behavior: smooth;
}
body {
font-family: var(--font-family-sans);
background-color: var(--color-background);
/* A subtle gradient background is often more pleasing than solid black */
background-image: radial-gradient(circle at top, #1a1a1a, var(--color-background));
color: var(--color-text-primary);
min-height: 100vh;
padding: var(--spacing-xl) var(--spacing-md);
}
.container {
max-width: var(--container-width);
margin: 0 auto;
}
/*
* =================================================================================================
* 3. TYPOGRAPHY
* Centralized text styles for headings and paragraphs.
* =================================================================================================
*/
h1 {
text-align: center;
color: var(--color-text-primary);
/* IMPROVEMENT: Fluid typography. Font size smoothly scales with the viewport width. */
font-size: clamp(1.8rem, 5vw, 2.5rem);
margin-bottom: var(--spacing-sm);
text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
text-transform: capitalize;
}
.subtitle-group {
text-align: center;
margin-bottom: var(--spacing-xl);
}
.subtitle {
color: var(--color-text-secondary);
font-size: 1.1rem;
line-height: 1.5;
}
.subtitle-meta {
color: #555;
font-size: 0.9rem;
margin-top: var(--spacing-xs);
}
/*
* =================================================================================================
* 4. TABLE STYLES
* The main leaderboard component.
* =================================================================================================
*/
.table-wrapper {
background: var(--color-surface);
border-radius: var(--border-radius);
border: 1px solid var(--color-border);
box-shadow: var(--shadow-lg);
overflow: hidden;
animation: fadeIn 0.6s ease-out;
}
table {
width: 100%;
border-collapse: collapse;
text-align: left;
}
thead {
background: var(--gradient-accent);
color: white;
/* IMPROVEMENT: Sticky header for better UX on scrollable tables (mobile) */
position: sticky;
top: 0;
z-index: 10;
}
th {
padding: var(--spacing-md);
font-weight: var(--font-weight-semibold);
font-size: 0.9rem;
text-transform: uppercase;
letter-spacing: 0.5px;
text-align: center;
user-select: none; /* Good place for this, prevents selecting header text by accident */
}
th:first-child, td:first-child { text-align: center; }
th:last-child, td:last-child { text-align: center; }
tbody tr {
border-bottom: 1px solid var(--color-border);
/* IMPROVEMENT: Transition specific properties for better performance than 'all' */
transition: background-color var(--transition-duration) ease, transform var(--transition-duration) ease;
}
tbody tr:hover {
background-color: rgba(255, 255, 255, 0.03);
/* IMPROVEMENT: 'translateY' is often smoother and less jarring than 'scale' */
transform: translateY(-2px);
}
tbody tr:last-child {
border-bottom: none;
}
td {
padding: var(--spacing-lg) var(--spacing-md);
font-size: 0.95rem;
vertical-align: middle; /* Ensures content aligns nicely if it wraps */
}
.rank {
font-weight: var(--font-weight-bold);
font-size: 1.1rem;
background: white;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
padding-right: var(--spacing-sm);
}
.model-name {
text-align: center;
font-weight: var(--font-weight-semibold);
color: var(--color-text-primary);
font-size: 1rem;
}
.score {
font-weight: var(--font-weight-bold);
font-size: 0.9rem;
text-align: center;
color: var(--color-text-secondary);
}
.progress-container {
width: 100%;
height: 8px;
background: #333;
border-radius: 99px;
overflow: hidden;
margin-top: var(--spacing-sm);
}
.progress-bar {
height: 100%;
background: white;
border-radius: 99px;
animation: fillBar 1.5s cubic-bezier(0.25, 1, 0.5, 1); /* Use a more dynamic easing function */
}
.performance-notes { font-size: 0.85rem; line-height: 1.5; }
/* IMPROVEMENT: DRY principle. Shared styles for strengths/weaknesses. */
.strengths, .weaknesses {
font-weight: var(--font-weight-medium);
}
.strengths { color: var(--color-success); }
.weaknesses { color: var(--color-danger); }
/*
* =================================================================================================
* 5. COMPONENTS (Badges & Legend)
* Reusable component styles.
* =================================================================================================
*/
/* IMPROVEMENT: DRY principle. A base class for all badges. */
.badge {
display: inline-block;
padding: var(--spacing-xs) var(--spacing-md);
border-radius: 99px;
font-size: 0.75rem;
font-weight: var(--font-weight-semibold);
text-transform: uppercase;
letter-spacing: 0.5px;
color: white;
white-space: nowrap;
}
/* Modifier classes for colors */
.badge--excellent { background: var(--gradient-success); }
.badge--good { background: var(--gradient-info); }
.badge--average { background: var(--gradient-warning); }
.badge--poor { background: var(--gradient-danger); }
.legend {
display: flex;
justify-content: center;
gap: var(--spacing-lg);
margin-top: var(--spacing-xl);
flex-wrap: wrap;
}
.legend-item {
display: flex;
align-items: center;
gap: var(--spacing-sm);
font-size: 0.9rem;
}
.legend-color {
width: 30px;
height: 8px;
border-radius: 4px;
}
/*
* =================================================================================================
* 6. ANIMATIONS & MEDIA QUERIES
* =================================================================================================
*/
@keyframes fadeIn {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
@keyframes fillBar {
from { width: 0; }
}
/* Responsive adjustments */
@media (max-width: 768px) {
body { padding: var(--spacing-xl) var(--spacing-sm); }
h1 { font-size: clamp(1.6rem, 7vw, 2rem); }
.subtitle { font-size: 1rem; }
th, td {
padding: var(--spacing-md) var(--spacing-sm);
font-size: 0.85rem;
}
.table-wrapper {
/* On mobile, allow the table itself to scroll horizontally */
overflow-x: auto;
}
table {
/* Ensure table has a minimum width to make scrolling meaningful */
min-width: 600px;
}
.legend {
flex-direction: column;
align-items: center;
gap: var(--spacing-md);
}
}
</style>
</head>
<body>
<div class="container">
<h1>meta arena</h1>
<!-- FIXED: A <p> cannot be nested inside another <p>. Using a div wrapper is valid. -->
<div class="subtitle-group">
<p class="subtitle">A few-shot test at small scale</p>
<p class="subtitle-meta">60+ models tested so far and counting...</p>
</div>
<div class="table-wrapper">
<table id="performanceTable">
<thead>
<tr>
<th>Rank</th>
<th>Model</th>
<th>Score</th>
<th>Performance Notes</th>
<th>Rating</th>
</tr>
</thead>
<tbody>
<!-- Data will be inserted here by JavaScript -->
</tbody>
</table>
</div>
<div class="legend">
<!-- Using classes instead of inline styles for cleaner HTML -->
<div class="legend-item">
<div class="legend-color" style="background: var(--gradient-success);"></div>
<span>Excellent (108+)</span>
</div>
<div class="legend-item">
<div class="legend-color" style="background: var(--gradient-info);"></div>
<span>Good (91-107)</span>
</div>
<div class="legend-item">
<div class="legend-color" style="background: var(--gradient-warning);"></div>
<span>Average (75-90)</span>
</div>
<div class="legend-item">
<div class="legend-color" style="background: var(--gradient-danger);"></div>
<span>Poor (&lt;75)</span>
</div>
</div>
</div>
<script>
// The JavaScript is already well-written. I'll just update class names to match the CSS refactor.
const models = [
{ rank: 1, name: "granite-4.0-h-tiny", score: 103.5, strengths: "Extremely well-rounded; top-tier in logic, math, translation, and synonyms.", weaknesses: "Fails completely at rhyming; hallucinates facts in summarization tasks." },
{ rank: 2, name: "Qwen3-4B-Instruct", score: 102, strengths: "Top performer, excels in core NLP, logic, and factual recall.", weaknesses: "Prone to factual hallucinations in summarization tasks." },
{ rank: 3, name: "lfm2-8b", score: 99, strengths: "Very logical, provides detailed, nuanced answers, strong at misconception correction.", weaknesses: "Struggles with creative tasks like rhyming and procedural sequencing." },
{ rank: 4, name: "Qwen3-MOE-4x0.6B-2.4B-Writing-Thunder-V1.2.Q8_0.gguf", score: 96, strengths: "Strong in logic, math, grammar, and summarization.", weaknesses: "Struggles with rhyming, synonyms, some translation, and procedural sequencing." },
{ rank: 5, name: "granite-3.3-2b-instruct-Q8_0.gguf", score: 95, strengths: "Excels at core NLP, logic, math, and misconception correction.", weaknesses: "Fails completely at NER, rhyming, and procedural sequencing." },
{ rank: 6, name: "granite-3.1-3b-instruct", score: 93.5, strengths: "Highly capable when it works; excellent at summarization and logic.", weaknesses: "Unreliable; frequently outputs junk characters ('{') instead of answering." },
{ rank: 7, name: "lfm2-2.6b", score: 93.5, strengths: "Strong core capabilities, great at grammar and misconception correction.", weaknesses: "Significant weakness in analogy, rhyming, and sequencing tasks." },
{ rank: 8, name: "EXAONE-3.5-2.4B-Instruct-abliterated.Q8_0.gguf", score: 93, strengths: "Excellent at reasoning, summarization, grammar, and misconception correction.", weaknesses: "Fails completely at translation and sequencing; unreliable output formatting." },
{ rank: 9, name: "Qwen3-1.7B", score: 92.5, strengths: "Good overall performance on core tasks and math.", weaknesses: "Fails completely on rhyming and has some odd analogy mistakes." },
{ rank: 10, name: "Llama-3.2-1B-Instruct", score: 92, strengths: "Great at core NLP, math, and code generation.", weaknesses: "Fails badly on misconception correction, sequencing, and paraphrasing." },
{ rank: 11, name: "lfm2-1.2b", score: 90.5, strengths: "Strong core skills like grammar, math, and translation.", weaknesses: "Knowledge gaps (object location) and hallucinates facts in headlines." },
{ rank: 12, name: "Falcon-H1-1.5B-Deep-Instruct", score: 89, strengths: "Excellent summarizer and paraphraser, strong on synonyms.", weaknesses: "Very poor at logical deduction, rhyming, and categorization." },
{ rank: 13, name: "arco-3", score: 83, strengths: "Powerful 0.6b model; perfect at code gen, sentiment, math, and core knowledge.", weaknesses: "Fails completely at summarization (hallucinations), sequencing, and rhyming." },
{ rank: 14, name: "Falcon-H1-1.5B-Instruct", score: 81, strengths: "Good at logic, math, and factual questions.", weaknesses: "Fails translation completely and often gives blank/junk answers." },
{ rank: 15, name: "Llama-3.2-SUN-HDIC-1B-Instruct.Q8_0.gguf", score: 79, strengths: "Strong in synonyms, math, and factual recall; decent at core NLP.", weaknesses: "Complete failure at summarization and misconception correction; bad factual hallucinations." },
{ rank: 16, name: "Piaget-0.6B.Q8_0.gguf", score: 78, strengths: "Excellent at core knowledge tasks: Sentiment, Object Location, Antonyms, Categorization, Math, Factual QA.", weaknesses: "Complete failure at Summarization, Sequencing, and Rhyming. Very poor at Grammar and Misconception Correction." },
{ rank: 17, name: "lfm2-700m", score: 75.5, strengths: "Handles sentiment, math, and logic correctly.", weaknesses: "Many failures in reasoning (cause/effect), tool use, synonyms, and grammar." },
{ rank: 18, name: "granite-4.0-350m-q8_0.gguf", score: 75.0, strengths: "Perfect translation and math; strong vocabulary (synonyms/antonyms) and common-sense knowledge.", weaknesses: "Complete failure on grammar correction; severe factual hallucinations in summarization; weak sequencing." },
{ rank: 19, name: "Qwen3-psychological-reasoning-0.6B.Q8_0.gguf", score: 73, strengths: "Excels at factual recall and classification (Sentiment, Object Location, Math, Factual QA, NER).", weaknesses: "Very poor at reasoning and creativity; complete failure in summarization, sequencing, and rhyming." },
{ rank: 20, name: "qwen2.5-0.5b-instruct", score: 72, strengths: "Decent at math, basic commands, and some logic.", weaknesses: "Fails creative tasks (rhyming, synonyms) and suffers major headline hallucinations." }
];
const maxScore = 125;
function getRatingBadge(score) {
if (score >= 108) return '<span class="badge badge--excellent">Excellent</span>';
if (score >= 91) return '<span class="badge badge--good">Good</span>';
if (score >= 75) return '<span class="badge badge--average">Average</span>';
return '<span class="badge badge--poor">Poor</span>';
}
function populateTable() {
const tbody = document.querySelector('#performanceTable tbody');
const top20Models = models.slice(0, 20);
top20Models.forEach((model, index) => {
const percentage = (model.score / maxScore) * 100;
const row = document.createElement('tr');
row.style.animation = `fadeIn 0.5s ease-out ${index * 0.05}s forwards`;
row.style.opacity = 0;
row.innerHTML = `
<td class="rank">#${model.rank}</td>
<td><div class="model-name">${model.name}</div></td>
<td>
<div class="score">${model.score.toFixed(1)} / ${maxScore}</div>
<div class="progress-container">
<div class="progress-bar" style="width: ${percentage}%"></div>
</div>
</td>
<td class="performance-notes">
<div class="strengths">βœ“ ${model.strengths}</div>
<div class="weaknesses">βœ— ${model.weaknesses}</div>
</td>
<td>${getRatingBadge(model.score)}</td>
`;
tbody.appendChild(row);
});
}
document.addEventListener('DOMContentLoaded', populateTable);
</script>
</body>
</html>