Skip to content

Instantly share code, notes, and snippets.

@MattWenJun
Created March 15, 2026 08:52
Show Gist options
  • Select an option

  • Save MattWenJun/0bd6212d7ea82411a49fa046df91ca17 to your computer and use it in GitHub Desktop.

Select an option

Save MattWenJun/0bd6212d7ea82411a49fa046df91ca17 to your computer and use it in GitHub Desktop.
OpenClaw LLM Intel Report 2026.03
<!DOCTYPE html>
<html lang="zh-CN" data-theme="light">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenClaw LLM Intel Report // 2026.03</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700;800&family=Orbitron:wght@400;500;600;700;800;900&family=Noto+Sans+SC:wght@300;400;500;700;900&display=swap" rel="stylesheet">
<script>
tailwind.config = {
theme: {
extend: {
colors: {
void: '#06080c',
slate950: '#0a0f1a',
slate900: '#0f172a',
amber: { 400: '#fbbf24', 500: '#f59e0b', 600: '#d97706' },
cyan: { 300: '#67e8f9', 400: '#22d3ee', 500: '#06b6d4' },
rose: { 400: '#fb7185', 500: '#f43f5e' },
emerald: { 400: '#34d399', 500: '#10b981' },
},
fontFamily: {
mono: ['JetBrains Mono', 'monospace'],
display: ['Orbitron', 'sans-serif'],
body: ['Noto Sans SC', 'sans-serif'],
}
}
}
}
</script>
<style>
* { scrollbar-width: thin; scrollbar-color: #fbbf2433 transparent; }
::-webkit-scrollbar { width: 6px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: #fbbf2433; border-radius: 3px; }
body {
background: #06080c;
color: #e2e8f0;
font-family: 'Noto Sans SC', 'JetBrains Mono', sans-serif;
}
@keyframes scanline {
0% { transform: translateY(-100%); }
100% { transform: translateY(100vh); }
}
@keyframes fadeInUp {
from { opacity: 0; transform: translateY(30px); }
to { opacity: 1; transform: translateY(0); }
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
@keyframes glitch {
0%, 100% { clip-path: inset(0 0 0 0); }
20% { clip-path: inset(20% 0 60% 0); transform: translateX(-2px); }
40% { clip-path: inset(60% 0 10% 0); transform: translateX(2px); }
60% { clip-path: inset(40% 0 30% 0); transform: translateX(-1px); }
80% { clip-path: inset(10% 0 70% 0); transform: translateX(1px); }
}
@keyframes pulse-border {
0%, 100% { border-color: #fbbf2444; }
50% { border-color: #fbbf24aa; }
}
@keyframes countUp {
from { opacity: 0; transform: scale(0.5); }
to { opacity: 1; transform: scale(1); }
}
@keyframes barGrow {
from { width: 0; }
}
@keyframes slideInLeft {
from { opacity: 0; transform: translateX(-40px); }
to { opacity: 1; transform: translateX(0); }
}
@keyframes typewriter {
from { width: 0; }
to { width: 100%; }
}
.animate-in { animation: fadeInUp 0.6s ease-out forwards; opacity: 0; }
.animate-fade { animation: fadeIn 0.8s ease-out forwards; opacity: 0; }
.animate-slide { animation: slideInLeft 0.5s ease-out forwards; opacity: 0; }
.scanline-overlay::after {
content: '';
position: fixed;
top: 0; left: 0; right: 0;
height: 2px;
background: linear-gradient(90deg, transparent, #fbbf2408, transparent);
animation: scanline 4s linear infinite;
pointer-events: none;
z-index: 9999;
}
.grid-bg {
background-image:
linear-gradient(rgba(251,191,36,0.03) 1px, transparent 1px),
linear-gradient(90deg, rgba(251,191,36,0.03) 1px, transparent 1px);
background-size: 40px 40px;
}
.noise-bg {
position: relative;
}
.noise-bg::before {
content: '';
position: absolute;
inset: 0;
background: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.03'/%3E%3C/svg%3E");
pointer-events: none;
z-index: 0;
}
.card-glow {
border: 1px solid #fbbf2422;
background: linear-gradient(135deg, #0f172aee, #0a0f1aee);
backdrop-filter: blur(10px);
position: relative;
overflow: hidden;
}
.card-glow::before {
content: '';
position: absolute;
top: 0; left: 0; right: 0;
height: 1px;
background: linear-gradient(90deg, transparent, #fbbf2466, transparent);
}
.tag-classified {
font-family: 'Orbitron', sans-serif;
font-size: 0.6rem;
letter-spacing: 0.3em;
color: #f43f5e;
border: 1px solid #f43f5e44;
padding: 2px 8px;
text-transform: uppercase;
}
.heatmap-cell {
transition: all 0.3s ease;
cursor: pointer;
position: relative;
}
.heatmap-cell:hover {
transform: scale(1.15);
z-index: 10;
box-shadow: 0 0 20px rgba(251,191,36,0.3);
}
.heatmap-cell .tooltip {
display: none;
position: absolute;
bottom: 100%;
left: 50%;
transform: translateX(-50%);
background: #1e293b;
border: 1px solid #fbbf2444;
padding: 4px 8px;
font-size: 0.7rem;
white-space: nowrap;
z-index: 20;
border-radius: 4px;
}
.heatmap-cell:hover .tooltip { display: block; }
.timeline-dot {
width: 14px; height: 14px;
border-radius: 50%;
border: 2px solid #fbbf24;
background: #06080c;
position: relative;
z-index: 2;
}
.timeline-dot::after {
content: '';
position: absolute;
top: 50%; left: 50%;
width: 6px; height: 6px;
border-radius: 50%;
background: #fbbf24;
transform: translate(-50%, -50%);
}
.sort-header {
cursor: pointer;
user-select: none;
transition: color 0.2s;
}
.sort-header:hover { color: #fbbf24; }
.sort-header.active { color: #fbbf24; }
.sort-header::after {
content: ' ↕';
opacity: 0.3;
font-size: 0.7em;
}
.sort-header.asc::after { content: ' ↑'; opacity: 1; }
.sort-header.desc::after { content: ' ↓'; opacity: 1; }
.bar-chart-bar {
animation: barGrow 1s ease-out forwards;
transform-origin: left;
}
.tier-badge {
font-family: 'Orbitron', sans-serif;
font-weight: 700;
font-size: 0.65rem;
letter-spacing: 0.15em;
padding: 2px 10px;
border-radius: 2px;
}
.issue-card {
border-left: 3px solid #fbbf24;
transition: all 0.3s ease;
}
.issue-card:hover {
border-left-color: #22d3ee;
background: #1e293b44;
transform: translateX(4px);
}
.tab-btn {
transition: all 0.2s;
border-bottom: 2px solid transparent;
}
.tab-btn.active {
color: #fbbf24;
border-bottom-color: #fbbf24;
}
.tab-content { display: none; }
.tab-content.active { display: block; }
@media (max-width: 768px) {
.hero-title { font-size: 1.5rem !important; }
.stat-grid { grid-template-columns: 1fr 1fr !important; }
.heatmap-grid { font-size: 0.6rem; }
}
/* ========== THEME TOGGLE ========== */
.theme-toggle {
position: fixed;
top: 20px;
right: 20px;
z-index: 9999;
width: 42px;
height: 42px;
border-radius: 50%;
border: 1px solid #fbbf2444;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
font-size: 18px;
transition: all 0.3s ease;
backdrop-filter: blur(10px);
background: rgba(15,23,42,0.8);
color: #fbbf24;
}
.theme-toggle:hover {
transform: scale(1.1);
border-color: #fbbf24;
box-shadow: 0 0 20px rgba(251,191,36,0.2);
}
/* ========== LIGHT THEME ========== */
[data-theme="light"] {
color-scheme: light;
}
[data-theme="light"] body {
background: #f1f5f9;
color: #1e293b;
}
[data-theme="light"] * { scrollbar-color: #94a3b833 transparent; }
[data-theme="light"] ::-webkit-scrollbar-thumb { background: #94a3b855; }
[data-theme="light"] .theme-toggle {
background: rgba(255,255,255,0.9);
border-color: #cbd5e1;
color: #475569;
}
[data-theme="light"] .theme-toggle:hover {
border-color: #f59e0b;
color: #f59e0b;
box-shadow: 0 0 20px rgba(245,158,11,0.15);
}
/* Scanline & grid: suppress in light */
[data-theme="light"].scanline-overlay::after,
[data-theme="light"] .scanline-overlay::after { opacity: 0; }
[data-theme="light"] .grid-bg {
background-image:
linear-gradient(rgba(148,163,184,0.08) 1px, transparent 1px),
linear-gradient(90deg, rgba(148,163,184,0.08) 1px, transparent 1px);
}
[data-theme="light"] .noise-bg::before { opacity: 0; }
/* Cards */
[data-theme="light"] .card-glow {
border-color: #e2e8f0;
background: linear-gradient(135deg, #ffffffee, #f8fafcee);
box-shadow: 0 1px 3px rgba(0,0,0,0.06), 0 1px 2px rgba(0,0,0,0.04);
}
[data-theme="light"] .card-glow::before {
background: linear-gradient(90deg, transparent, #e2e8f0, transparent);
}
/* Typography overrides */
[data-theme="light"] .text-white { color: #0f172a !important; }
[data-theme="light"] .text-slate-300 { color: #334155 !important; }
[data-theme="light"] .text-slate-400 { color: #64748b !important; }
[data-theme="light"] .text-slate-500 { color: #64748b !important; }
[data-theme="light"] .text-slate-600 { color: #94a3b8 !important; }
[data-theme="light"] .text-slate-700 { color: #94a3b8 !important; }
/* Hero glow blobs */
[data-theme="light"] .bg-amber-500\/5 { background: rgba(251,191,36,0.08) !important; }
[data-theme="light"] .bg-cyan-500\/5 { background: rgba(6,182,212,0.06) !important; }
/* Amber accent adjustments for readability */
[data-theme="light"] .text-amber-400 { color: #d97706 !important; }
[data-theme="light"] .text-amber-500 { color: #b45309 !important; }
[data-theme="light"] .from-amber-400 { --tw-gradient-from: #d97706 !important; }
[data-theme="light"] .via-amber-300 { --tw-gradient-via: #f59e0b !important; }
[data-theme="light"] .to-cyan-400 { --tw-gradient-to: #0891b2 !important; }
[data-theme="light"] .text-cyan-400 { color: #0891b2 !important; }
[data-theme="light"] .text-emerald-400 { color: #059669 !important; }
[data-theme="light"] .text-rose-400 { color: #e11d48 !important; }
/* Tag */
[data-theme="light"] .tag-classified {
color: #e11d48;
border-color: #fda4af;
background: #fff1f2;
}
/* Table */
[data-theme="light"] .border-b.border-slate-800\/50 { border-color: #e2e8f0 !important; }
[data-theme="light"] .border-b.border-amber-500\/20 { border-color: #e2e8f0 !important; }
[data-theme="light"] tr:hover { background: rgba(241,245,249,0.8) !important; }
[data-theme="light"] .bg-slate-800 { background: #e2e8f0 !important; }
[data-theme="light"] .bg-slate-900\/80 { background: rgba(241,245,249,0.9) !important; }
[data-theme="light"] .bg-slate-800\/50 { background: rgba(226,232,240,0.5) !important; }
/* Tier badges */
[data-theme="light"] .bg-emerald-500\/20 { background: rgba(16,185,129,0.12) !important; }
[data-theme="light"] .bg-cyan-500\/20 { background: rgba(6,182,212,0.12) !important; }
[data-theme="light"] .bg-amber-500\/20 { background: rgba(245,158,11,0.12) !important; }
[data-theme="light"] .bg-rose-500\/20 { background: rgba(244,63,94,0.12) !important; }
[data-theme="light"] .bg-emerald-500\/10 { background: rgba(16,185,129,0.08) !important; }
[data-theme="light"] .bg-cyan-500\/10 { background: rgba(6,182,212,0.08) !important; }
[data-theme="light"] .bg-amber-500\/10 { background: rgba(245,158,11,0.08) !important; }
[data-theme="light"] .bg-rose-500\/10 { background: rgba(244,63,94,0.08) !important; }
/* Heatmap tooltip */
[data-theme="light"] .heatmap-cell .tooltip {
background: #ffffff;
border-color: #e2e8f0;
color: #1e293b;
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
}
[data-theme="light"] .heatmap-cell:hover {
box-shadow: 0 0 20px rgba(0,0,0,0.1);
}
/* Timeline dot */
[data-theme="light"] .timeline-dot {
background: #f1f5f9;
}
[data-theme="light"] .timeline-dot { border-color: #d97706; }
[data-theme="light"] .timeline-dot::after { background: #d97706; }
/* Issue card hover */
[data-theme="light"] .issue-card:hover {
background: rgba(241,245,249,0.8);
border-left-color: #0891b2;
}
/* Sort header */
[data-theme="light"] .sort-header:hover { color: #d97706; }
[data-theme="light"] .sort-header.active { color: #d97706; }
/* Tab */
[data-theme="light"] .tab-btn.active {
color: #d97706;
border-bottom-color: #d97706;
}
/* Nav bar */
[data-theme="light"] nav .bg-slate-900\/90 {
background: rgba(255,255,255,0.92) !important;
border-color: #e2e8f0 !important;
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
}
[data-theme="light"] nav a {
color: #64748b !important;
}
[data-theme="light"] nav a:hover {
color: #d97706 !important;
background: rgba(245,158,11,0.08) !important;
}
/* Footer border */
[data-theme="light"] .border-t.border-amber-500\/10 { border-color: #e2e8f0 !important; }
[data-theme="light"] .border-amber-500\/10 { border-color: #e2e8f0 !important; }
/* Gradient line decorations */
[data-theme="light"] .bg-gradient-to-b.from-amber-500\/50 { --tw-gradient-from: rgba(217,119,6,0.4) !important; }
[data-theme="light"] .w-8.h-px.bg-amber-500 { background: #d97706 !important; }
/* Tier card left borders */
[data-theme="light"] .border-l-emerald-500 { border-left-color: #059669 !important; }
[data-theme="light"] .border-l-cyan-500 { border-left-color: #0891b2 !important; }
[data-theme="light"] .border-l-amber-500 { border-left-color: #d97706 !important; }
[data-theme="light"] .border-l-rose-500 { border-left-color: #e11d48 !important; }
/* Violet accents */
[data-theme="light"] .text-violet-300 { color: #7c3aed !important; }
[data-theme="light"] .text-violet-400 { color: #7c3aed !important; }
</style>
</head>
<body class="scanline-overlay grid-bg noise-bg min-h-screen">
<!-- Theme Toggle -->
<button class="theme-toggle" id="themeToggle" aria-label="Toggle theme" title="Toggle light/dark theme">
<svg id="themeIconSun" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="display:none"><circle cx="12" cy="12" r="5"/><line x1="12" y1="1" x2="12" y2="3"/><line x1="12" y1="21" x2="12" y2="23"/><line x1="4.22" y1="4.22" x2="5.64" y2="5.64"/><line x1="18.36" y1="18.36" x2="19.78" y2="19.78"/><line x1="1" y1="12" x2="3" y2="12"/><line x1="21" y1="12" x2="23" y2="12"/><line x1="4.22" y1="19.78" x2="5.64" y2="18.36"/><line x1="18.36" y1="5.64" x2="19.78" y2="4.22"/></svg>
<svg id="themeIconMoon" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"/></svg>
</button>
<!-- ============ HERO ============ -->
<header class="relative overflow-hidden" style="min-height: 100vh; display: flex; align-items: center;">
<div class="absolute top-0 left-0 w-full h-full">
<div class="absolute top-10 left-10 w-64 h-64 bg-amber-500/5 rounded-full blur-[100px]"></div>
<div class="absolute bottom-20 right-20 w-96 h-96 bg-cyan-500/5 rounded-full blur-[120px]"></div>
<div class="absolute top-1/3 right-1/4 w-px h-40 bg-gradient-to-b from-transparent via-amber-500/20 to-transparent"></div>
<div class="absolute top-1/2 left-1/3 w-40 h-px bg-gradient-to-r from-transparent via-amber-500/20 to-transparent"></div>
</div>
<div class="relative z-10 w-full max-w-7xl mx-auto px-6 py-20">
<div class="flex items-center gap-4 mb-8 animate-fade" style="animation-delay: 0.1s">
<span class="tag-classified">INTEL REPORT</span>
<span class="font-mono text-xs text-slate-500">REF: OC-LLM-2026-03-15 // CLEARANCE: PUBLIC</span>
</div>
<h1 class="hero-title font-display text-4xl md:text-6xl font-black text-transparent bg-clip-text bg-gradient-to-r from-amber-400 via-amber-300 to-cyan-400 mb-4 animate-in" style="animation-delay: 0.2s; line-height: 1.2;">
OpenClaw LLM<br>
<span class="text-3xl md:text-5xl">实力评估报告</span>
</h1>
<p class="font-mono text-sm md:text-base text-slate-400 max-w-2xl mb-12 animate-in" style="animation-delay: 0.4s">
<span class="text-cyan-400">$</span> 基于 GitHub Issues、Changelog 及社区反馈的真实表现分析<br>
<span class="text-slate-600">// Generated by Claude Opus 4.6 Agent Team</span>
</p>
<div class="stat-grid grid grid-cols-2 md:grid-cols-4 gap-4 max-w-4xl">
<div class="card-glow rounded-lg p-5 animate-in" style="animation-delay: 0.5s">
<div class="font-mono text-xs text-amber-500/60 uppercase tracking-wider mb-1">Issue 搜索覆盖</div>
<div class="font-display text-3xl font-bold text-amber-400" id="stat1">0</div>
<div class="font-mono text-xs text-slate-500 mt-1">模型相关 Issues (API)</div>
</div>
<div class="card-glow rounded-lg p-5 animate-in" style="animation-delay: 0.6s">
<div class="font-mono text-xs text-cyan-500/60 uppercase tracking-wider mb-1">模型覆盖</div>
<div class="font-display text-3xl font-bold text-cyan-400" id="stat2">0</div>
<div class="font-mono text-xs text-slate-500 mt-1">主流大模型</div>
</div>
<div class="card-glow rounded-lg p-5 animate-in" style="animation-delay: 0.7s">
<div class="font-mono text-xs text-emerald-500/60 uppercase tracking-wider mb-1">数据截止</div>
<div class="font-display text-lg font-bold text-emerald-400">v3.13</div>
<div class="font-mono text-xs text-slate-500 mt-1">v2026.3.13</div>
</div>
<div class="card-glow rounded-lg p-5 animate-in" style="animation-delay: 0.8s">
<div class="font-mono text-xs text-rose-500/60 uppercase tracking-wider mb-1">项目规模</div>
<div class="font-display text-3xl font-bold text-rose-400" id="stat3">0</div>
<div class="font-mono text-xs text-slate-500 mt-1">GitHub Stars</div>
</div>
</div>
<div class="absolute bottom-10 left-1/2 -translate-x-1/2 animate-bounce">
<div class="w-px h-12 bg-gradient-to-b from-amber-500/50 to-transparent"></div>
</div>
</div>
</header>
<!-- ============ METHODOLOGY ============ -->
<section class="max-w-7xl mx-auto px-6 py-20" id="methodology">
<div class="flex items-center gap-3 mb-8">
<div class="w-8 h-px bg-amber-500"></div>
<h2 class="font-display text-xs tracking-[0.3em] text-amber-500 uppercase">Methodology</h2>
<span class="tag-classified">SEC.01</span>
</div>
<h3 class="font-body text-2xl md:text-3xl font-bold text-white mb-10">评估方法论</h3>
<div class="grid md:grid-cols-2 gap-6">
<!-- Data Sources -->
<div class="card-glow rounded-lg p-6">
<div class="font-mono text-xs text-cyan-400 mb-4 uppercase tracking-wider">// 数据来源(全部可溯源)</div>
<ul class="space-y-3 text-sm">
<li class="flex items-start gap-3">
<span class="text-amber-400 mt-0.5"></span>
<div><span class="text-white font-medium">GitHub Issues</span><span class="text-slate-400"> — openclaw/openclaw (313k stars),GitHub Search API 覆盖 ~6,000 条模型相关 Issue,20 条通过 API 逐条验证原文</span></div>
</li>
<li class="flex items-start gap-3">
<span class="text-amber-400 mt-0.5"></span>
<div><span class="text-white font-medium">Changelog / Release Notes</span><span class="text-slate-400"> — v2026.2.22 至 v2026.3.13 共 8 个版本,3 个版本通过 gh release view 获取完整原文</span></div>
</li>
<li class="flex items-start gap-3">
<span class="text-amber-400 mt-0.5"></span>
<div><span class="text-white font-medium">SWE-bench Verified(双源交叉验证)</span><span class="text-slate-400"> — llm-stats.com + marc0.dev 交叉验证;GPT-5.4 使用前代参考值,Seed2.0 为单源</span></div>
</li>
<li class="flex items-start gap-3">
<span class="text-amber-400 mt-0.5"></span>
<div><span class="text-white font-medium">OpenRouter 使用统计</span><span class="text-slate-400"> — Token 消费排名 (Dataconomy 2026.2.24)、编程查询份额 (State of AI)、中国模型 61% 市场份额</span></div>
</li>
<li class="flex items-start gap-3">
<span class="text-amber-400 mt-0.5"></span>
<div><span class="text-white font-medium">创始人公开偏好</span><span class="text-slate-400"> — Peter Steinberger 推荐 Claude Opus/Codex (YC 专访/Lex Fridman Podcast #491;36kr 为编译转载)</span></div>
</li>
</ul>
</div>
<!-- Scoring Formula -->
<div class="card-glow rounded-lg p-6">
<div class="font-mono text-xs text-cyan-400 mb-4 uppercase tracking-wider">// 评分公式(v6.0 透明化)</div>
<div class="bg-slate-900/80 rounded p-4 mb-4 font-mono text-sm">
<span class="text-amber-400">综合评分</span> = <span class="text-cyan-300">SWE-bench</span> <span class="text-slate-500">x</span> <span class="text-white">30%</span>
+ <span class="text-emerald-300">集成实战</span> <span class="text-slate-500">x</span> <span class="text-white">50%</span>
+ <span class="text-violet-300">综合价值</span> <span class="text-slate-500">x</span> <span class="text-white">20%</span>
</div>
<div class="space-y-3 text-sm">
<div>
<div class="flex justify-between mb-1">
<span class="text-cyan-300 font-medium">SWE-bench 编码能力 (30%)</span>
<span class="font-mono text-xs text-slate-500">双源交叉验证</span>
</div>
<div class="h-1.5 bg-slate-800 rounded-full"><div class="h-full bg-gradient-to-r from-cyan-500 to-cyan-400 rounded-full" style="width: 30%"></div></div>
</div>
<div>
<div class="flex justify-between mb-1">
<span class="text-emerald-300 font-medium">OpenClaw 集成实战 (50%)</span>
<span class="font-mono text-xs text-slate-500">Issue→Fix 因果链</span>
</div>
<div class="h-1.5 bg-slate-800 rounded-full"><div class="h-full bg-gradient-to-r from-emerald-500 to-emerald-400 rounded-full" style="width: 50%"></div></div>
</div>
<div>
<div class="flex justify-between mb-1">
<span class="text-violet-300 font-medium">综合价值 (20%)</span>
<span class="font-mono text-xs text-slate-500">成本 + 市场 + 开源</span>
</div>
<div class="h-1.5 bg-slate-800 rounded-full"><div class="h-full bg-gradient-to-r from-violet-500 to-violet-400 rounded-full" style="width: 20%"></div></div>
</div>
</div>
<div class="mt-4 p-3 bg-slate-800/50 rounded text-xs text-slate-400 font-mono leading-relaxed">
<span class="text-amber-400">验算示例:</span><br>
Claude Opus: 80.8x0.3 + 96x0.5 + 62x0.2 = 24.2+48.0+12.4 = <span class="text-white">85</span><br>
DeepSeek-V3.2: 73.0x0.3 + 80x0.5 + 92x0.2 = 21.9+40.0+18.4 = <span class="text-white">80</span><br>
Gemini 3.1 Pro: 80.6x0.3 + 45x0.5 + 52x0.2 = 24.2+22.5+10.4 = <span class="text-white">57</span><br>
<span class="text-slate-500">// 集成权重 50% 体现报告定位:OpenClaw 生态实战表现</span>
</div>
</div>
</div>
<!-- F1 Fix: Scoring Rubrics -->
<div class="grid md:grid-cols-2 gap-6 mt-6">
<!-- Integration Rubric -->
<div class="card-glow rounded-lg p-6">
<div class="font-mono text-xs text-emerald-400 mb-4 uppercase tracking-wider">// 集成实战评分标准 (Rubric)</div>
<div class="space-y-2 text-xs">
<div class="flex items-start gap-2"><span class="text-emerald-400 font-mono shrink-0">90-100</span><span class="text-slate-400">Issue&#8594;Fix 链完整,关键 Issue 全部解决,无回退,响应 &#8804;2 天,Release Notes 有专项修复</span></div>
<div class="flex items-start gap-2"><span class="text-emerald-400 font-mono shrink-0">75-89 </span><span class="text-slate-400">Issue&#8594;Fix 链基本完整,多数 Issue 已解决,仅剩轻微/UI 级残留</span></div>
<div class="flex items-start gap-2"><span class="text-cyan-400 font-mono shrink-0">55-74 </span><span class="text-slate-400">部分修复 &#8212; 根因已处理但下游仍 Open,或特定运行时环境可用</span></div>
<div class="flex items-start gap-2"><span class="text-amber-400 font-mono shrink-0">35-54 </span><span class="text-slate-400">修复不稳定(反复回退)或关键 Issue(死循环等)仍 Open</span></div>
<div class="flex items-start gap-2"><span class="text-rose-400 font-mono shrink-0">15-34 </span><span class="text-slate-400">问题持续升级,Changelog 无专项修复,基础设施级故障</span></div>
<div class="flex items-start gap-2"><span class="text-rose-400 font-mono shrink-0"> 0-14 </span><span class="text-slate-400">无数据或生态完全缺位,无法构建 Issue&#8594;Fix 链</span></div>
</div>
<div class="mt-3 p-2 bg-slate-800/50 rounded text-[10px] text-slate-500 font-mono leading-relaxed">
Claude Opus(96): 链完整+1天响应+0 Critical Open &#8594; 90-100 区间<br>
DeepSeek(80): 3/4 Issue Closed+仅 UI 残留 &#8594; 75-89 区间<br>
MiniMax(60): 根因修复+3 下游 Open &#8594; 55-74 区间<br>
Kimi(50): 多次修复后反复回退 &#8594; 35-54 区间<br>
Gemini(45): 死循环 #41620 仍 Open+12 Bug &#8594; 35-54 区间<br>
GLM-5(22): 0 专项修复+v3.12 致回退 &#8594; 15-34 区间<br>
Seed(10): 66 条间接提及+无专项 Bug &#8594; 0-14 区间<br>
<span class="text-amber-400">GPT-5.4(82)/Qwen(62): MANIFEST 无专项因果链,基于 Timeline 数据定性评估</span>
</div>
</div>
<!-- Value Rubric -->
<div class="card-glow rounded-lg p-6">
<div class="font-mono text-xs text-violet-400 mb-4 uppercase tracking-wider">// 综合价值评分标准 (Rubric)</div>
<div class="space-y-2 text-xs">
<div class="flex items-start gap-2"><span class="text-emerald-400 font-mono shrink-0">85-100</span><span class="text-slate-400">MIT/Apache 开源 + 成本优势 &gt;10x + OpenRouter 高使用量</span></div>
<div class="flex items-start gap-2"><span class="text-emerald-400 font-mono shrink-0">70-84 </span><span class="text-slate-400">开源+中等成本优势,或闭源+Arena ELO 验证+高性价比</span></div>
<div class="flex items-start gap-2"><span class="text-cyan-400 font-mono shrink-0">50-69 </span><span class="text-slate-400">闭源+中等定价,或开源但基础设施问题抵消价值</span></div>
<div class="flex items-start gap-2"><span class="text-amber-400 font-mono shrink-0">30-49 </span><span class="text-slate-400">闭源+高定价,质量溢价部分抵消成本</span></div>
<div class="flex items-start gap-2"><span class="text-rose-400 font-mono shrink-0"> 0-29 </span><span class="text-slate-400">闭源+无定价数据+生态缺位,或数据不足无法评估</span></div>
</div>
<div class="mt-3 p-2 bg-slate-800/50 rounded text-[10px] text-slate-500 font-mono leading-relaxed">
DeepSeek(92): MIT+68-90x 成本优势+累计 token 第一 &#8594; 85-100<br>
MiniMax(85): MIT+低成本+周 token 第一 &#8594; 85-100<br>
Qwen(82): Apache 2.0+397B MoE &#8594; 70-84<br>
Kimi(78): 开源+1T MoE+ELO~1370 &#8594; 70-84<br>
Sonnet(75): 闭源但 Claude 家族最优性价比+ELO 1435 &#8594; 70-84<br>
Opus(62): 闭源+最高定价($15/$25)+ELO 1510 质量溢价 &#8594; 50-69<br>
GPT-5.4(55): 闭源+发布 10 天+无独立验证 &#8594; 50-69<br>
Gemini(52): 闭源+Preview 定价不明 &#8594; 50-69<br>
GLM-5(48): MIT 但基础设施"unusable" &#8594; 30-49<br>
Seed(20): 闭源+无定价+无生态 &#8594; 0-29
</div>
</div>
</div>
<!-- F5 Fix: Model coverage note -->
<div class="mt-6 p-3 card-glow rounded text-xs text-slate-500 font-mono">
<span class="text-amber-400/60">模型覆盖说明:</span>本报告覆盖 10 个在 OpenClaw 生态中有显著 Issue 记录或市场份额的模型。Meta Llama(OpenRouter 累计 token 第三)和 Mistral 因在 OpenClaw Issues 中主要作为本地推理后端出现、非独立 Provider 集成而未纳入排名。
</div>
</section>
<!-- ============ RANKINGS TABLE ============ -->
<section class="max-w-7xl mx-auto px-6 py-20" id="rankings">
<div class="flex items-center gap-3 mb-8">
<div class="w-8 h-px bg-amber-500"></div>
<h2 class="font-display text-xs tracking-[0.3em] text-amber-500 uppercase">Rankings</h2>
<span class="tag-classified">SEC.02</span>
</div>
<h3 class="font-body text-2xl md:text-3xl font-bold text-white mb-4">综合实力排名</h3>
<p class="text-sm text-slate-400 mb-8 font-mono">// 点击表头排序 · 综合评分 = SWE-bench x 30% + 集成实战 x 50% + 综合价值 x 20%<br><span class="text-rose-400/60">&#9888; 集成得分衡量 OpenClaw 实战可用性(含平台适配因素),非纯模型能力评分 · 本报告由 Claude 生成,详见三重偏差声明</span></p>
<div class="overflow-x-auto">
<table class="w-full text-sm" id="rankTable">
<thead>
<tr class="border-b border-amber-500/20">
<th class="sort-header text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider" data-col="rank" data-type="num">#</th>
<th class="text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider">模型</th>
<th class="text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider hidden md:table-cell">产地</th>
<th class="sort-header text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider" data-col="score" data-type="num">综合评分</th>
<th class="sort-header text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider" data-col="swe" data-type="num">SWE-bench</th>
<th class="sort-header text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider" data-col="integration" data-type="num">集成得分</th>
<th class="text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider" title="定性评估,基于 Issue 修复稳定度">稳定性*</th>
<th class="text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider" title="定性评估,基于成本/开源/市场数据">性价比*</th>
<th class="text-left py-3 px-3 font-mono text-xs text-slate-400 uppercase tracking-wider hidden md:table-cell">评价</th>
</tr>
</thead>
<tbody id="rankBody"></tbody>
</table>
<p class="text-[10px] text-slate-600 font-mono mt-3">* 稳定性/性价比为基于 Rubric 区间的定性评估。GPT-5.4 和 Qwen 3.5 在 DATA-MANIFEST 中无专项 Issue&#8594;Fix 因果链,集成分基于 Timeline 数据定性评估,溯源严格度低于其他模型。</p>
</div>
</section>
<!-- ============ HEATMAP ============ -->
<section class="max-w-7xl mx-auto px-6 py-20" id="heatmap">
<div class="flex items-center gap-3 mb-8">
<div class="w-8 h-px bg-amber-500"></div>
<h2 class="font-display text-xs tracking-[0.3em] text-amber-500 uppercase">Issue Heatmap</h2>
<span class="tag-classified">SEC.03</span>
</div>
<h3 class="font-body text-2xl md:text-3xl font-bold text-white mb-4">GitHub Issues 问题热力图</h3>
<p class="text-sm text-slate-400 mb-8 font-mono">// 各模型在不同问题类别的 Bug 密度分布 · 悬停查看详情<br><span class="text-slate-600">* 热力图数值为基于 Issue 内容的定性归类,非精确统计;DATA-MANIFEST 中无逐格溯源</span></p>
<div class="card-glow rounded-lg p-6 overflow-x-auto">
<div class="heatmap-grid min-w-[700px]" id="heatmapGrid"></div>
<div class="flex items-center gap-4 mt-6 text-xs font-mono text-slate-500">
<span>严重程度:</span>
<div class="flex items-center gap-1"><div class="w-4 h-4 rounded" style="background: #065f46"></div><span>0</span></div>
<div class="flex items-center gap-1"><div class="w-4 h-4 rounded" style="background: #059669"></div><span>1</span></div>
<div class="flex items-center gap-1"><div class="w-4 h-4 rounded" style="background: #eab308"></div><span>2</span></div>
<div class="flex items-center gap-1"><div class="w-4 h-4 rounded" style="background: #ea580c"></div><span>3</span></div>
<div class="flex items-center gap-1"><div class="w-4 h-4 rounded" style="background: #dc2626"></div><span>4+</span></div>
<span class="ml-4 text-slate-600">| Claude = Sonnet+Opus 合并 · Seed2.0 因数据不足(66 条 Issue)未纳入</span>
</div>
</div>
</section>
<!-- ============ CHANGELOG TIMELINE ============ -->
<section class="max-w-7xl mx-auto px-6 py-20" id="timeline">
<div class="flex items-center gap-3 mb-8">
<div class="w-8 h-px bg-amber-500"></div>
<h2 class="font-display text-xs tracking-[0.3em] text-amber-500 uppercase">Changelog</h2>
<span class="tag-classified">SEC.04</span>
</div>
<h3 class="font-body text-2xl md:text-3xl font-bold text-white mb-4">修复追踪时间线</h3>
<p class="text-sm text-slate-400 mb-10 font-mono">// 2026.2.22 — 2026.3.13 共 8 个稳定版 · Release Notes 通过 gh release view 验证</p>
<div class="relative max-w-3xl mx-auto">
<div class="absolute left-6 top-0 bottom-0 w-px bg-gradient-to-b from-amber-500/50 via-amber-500/20 to-transparent"></div>
<div class="space-y-8" id="timelineContainer"></div>
</div>
</section>
<!-- ============ GAP ANALYSIS (was Hype Index) ============ -->
<section class="max-w-7xl mx-auto px-6 py-20" id="gap">
<div class="flex items-center gap-3 mb-8">
<div class="w-8 h-px bg-amber-500"></div>
<h2 class="font-display text-xs tracking-[0.3em] text-amber-500 uppercase">Gap Analysis</h2>
<span class="tag-classified">SEC.05</span>
</div>
<h3 class="font-body text-2xl md:text-3xl font-bold text-white mb-4">能力落差分析:Benchmark vs 实战</h3>
<p class="text-sm text-slate-400 mb-10 font-mono">// SWE-bench 编码能力 vs OpenClaw 集成得分 · 两组数据均可溯源验证<br><span class="text-cyan-400/60">正值 = 集成超额交付 · 负值 = Benchmark 能力未转化为实战表现</span></p>
<div class="card-glow rounded-lg p-6 md:p-8">
<div class="space-y-6" id="gapChart"></div>
<div class="mt-10 pt-6 border-t border-amber-500/10">
<div class="flex items-start gap-3">
<span class="text-amber-400 text-lg mt-0.5">&#9889;</span>
<div>
<p class="text-white font-medium mb-2">核心发现</p>
<p class="text-sm text-slate-400 leading-relaxed">
<span class="text-emerald-400 font-medium">Claude 和 DeepSeek 集成表现超过 Benchmark 预期</span>
— Claude 受益于平台优先适配(创始人推荐 + 社区主力),DeepSeek Issue→Fix 链完整且成本极低。
<span class="text-amber-400 font-medium">Gemini 和 GLM-5 存在最大能力落差</span>
— SWE-bench 分别为 80.6% 和 77.8%,但 OpenClaw 集成得分仅 45 和 22。
<span class="text-rose-400">Benchmark 高分不等于生产可用</span>,集成质量、修复响应速度和平台适配决定实战表现。
<br><span class="text-slate-500 text-xs">注:Claude 集成优势部分归因于平台优先适配(三重偏差),读者应参考 Arena 盲评交叉验证。</span>
</p>
</div>
</div>
</div>
</div>
</section>
<!-- ============ KEY FINDINGS ============ -->
<section class="max-w-7xl mx-auto px-6 py-20" id="findings">
<div class="flex items-center gap-3 mb-8">
<div class="w-8 h-px bg-amber-500"></div>
<h2 class="font-display text-xs tracking-[0.3em] text-amber-500 uppercase">Key Findings</h2>
<span class="tag-classified">SEC.06</span>
</div>
<h3 class="font-body text-2xl md:text-3xl font-bold text-white mb-4">关键发现 & Issue 引用</h3>
<p class="text-sm text-slate-400 mb-10 font-mono">// 基于 GitHub Issues 和社区反馈的一手证据</p>
<div class="grid md:grid-cols-2 gap-4" id="findingsGrid"></div>
</section>
<!-- ============ CONCLUSION ============ -->
<section class="max-w-7xl mx-auto px-6 py-20" id="conclusion">
<div class="flex items-center gap-3 mb-8">
<div class="w-8 h-px bg-amber-500"></div>
<h2 class="font-display text-xs tracking-[0.3em] text-amber-500 uppercase">Conclusion</h2>
<span class="tag-classified">SEC.07</span>
</div>
<h3 class="font-body text-2xl md:text-3xl font-bold text-white mb-4">最终评估结论</h3>
<p class="text-sm text-slate-400 mb-10 font-mono">// 综合评分 = SWE-bench x 30% + 集成实战 x 50% + 综合价值 x 20% · 详见 DATA-MANIFEST.md</p>
<!-- Tier Cards -->
<div class="space-y-6 mb-16">
<!-- Tier 1 -->
<div class="card-glow rounded-lg p-6 border-l-4 border-l-emerald-500">
<div class="flex items-center gap-3 mb-4">
<span class="tier-badge bg-emerald-500/20 text-emerald-400 border border-emerald-500/30">TIER 1</span>
<span class="font-body text-white font-bold">生产级可靠(综合 ≥78)</span>
</div>
<div class="flex flex-wrap gap-3">
<span class="px-3 py-1.5 bg-emerald-500/10 border border-emerald-500/20 rounded text-emerald-300 text-sm font-medium">Claude Sonnet 4.6 <span class="text-emerald-500/60 text-xs">86</span></span>
<span class="px-3 py-1.5 bg-emerald-500/10 border border-emerald-500/20 rounded text-emerald-300 text-sm font-medium">Claude Opus 4.6 <span class="text-emerald-500/60 text-xs">85</span></span>
<span class="px-3 py-1.5 bg-emerald-500/10 border border-emerald-500/20 rounded text-emerald-300 text-sm font-medium">DeepSeek-V3.2 <span class="text-emerald-500/60 text-xs">80</span></span>
</div>
<p class="text-sm text-slate-400 mt-3">Sonnet 集成质量与 Opus 持平(94 vs 96)但综合价值更高,公式计算领先 1 分。DeepSeek Issue→Fix 链完整(#31850→fix, #42173→fix),68-90x 成本优势使综合价值维度得分 92,弥补 SWE-bench 较低(73.0%)的短板。三个模型共同特征:Issue→Fix 因果链完整,无 Critical 级未解决问题。</p>
</div>
<!-- Tier 2 -->
<div class="card-glow rounded-lg p-6 border-l-4 border-l-cyan-500">
<div class="flex items-center gap-3 mb-4">
<span class="tier-badge bg-cyan-500/20 text-cyan-400 border border-cyan-500/30">TIER 2</span>
<span class="font-body text-white font-bold">有竞争力但集成有坑(综合 62-77)</span>
</div>
<div class="flex flex-wrap gap-3">
<span class="px-3 py-1.5 bg-cyan-500/10 border border-cyan-500/20 rounded text-cyan-300 text-sm font-medium">GPT-5.4 <span class="text-cyan-500/60 text-xs">76</span></span>
<span class="px-3 py-1.5 bg-cyan-500/10 border border-cyan-500/20 rounded text-cyan-300 text-sm font-medium">MiniMax M2.5 <span class="text-cyan-500/60 text-xs">71</span></span>
<span class="px-3 py-1.5 bg-cyan-500/10 border border-cyan-500/20 rounded text-cyan-300 text-sm font-medium">Qwen 3.5 <span class="text-cyan-500/60 text-xs">70</span></span>
<span class="px-3 py-1.5 bg-cyan-500/10 border border-cyan-500/20 rounded text-cyan-300 text-sm font-medium">Kimi K2.5 <span class="text-cyan-500/60 text-xs">64</span></span>
</div>
<p class="text-sm text-slate-400 mt-3">GPT-5.4 发布仅 10 天(2026.3.5),SWE-bench 80.0% 为前代 GPT-5.2 参考值,集成默认测试模型但独立验证不足。MiniMax SWE-bench 80.2%(T1 级)但 3 个集成 Issue 仍 Open(#33133/#34487/#45882)。Qwen llama.cpp/vLLM 下可靠,Ollama 有模板 Bug。Kimi 原生 tool calling 优秀但 OpenClaw 适配反复回退(commit 909f26a 格式转换错误)。</p>
</div>
<!-- Tier 3 -->
<div class="card-glow rounded-lg p-6 border-l-4 border-l-amber-500">
<div class="flex items-center gap-3 mb-4">
<span class="tier-badge bg-amber-500/20 text-amber-400 border border-amber-500/30">TIER 3</span>
<span class="font-body text-white font-bold">能力-集成严重落差(综合 40-61)</span>
</div>
<div class="flex flex-wrap gap-3">
<span class="px-3 py-1.5 bg-amber-500/10 border border-amber-500/20 rounded text-amber-300 text-sm font-medium">Gemini 3.1 Pro <span class="text-amber-500/60 text-xs">57</span></span>
<span class="px-3 py-1.5 bg-amber-500/10 border border-amber-500/20 rounded text-amber-300 text-sm font-medium">GLM-5 <span class="text-amber-500/60 text-xs">44</span></span>
</div>
<p class="text-sm text-slate-400 mt-3">Gemini SWE-bench 80.6%(全场第二)但 OpenClaw 集成得分仅 45:死循环(#41620)、12+ 协议层 Bug、model prefix 未剥离(#41398)。<span class="text-amber-400">Benchmark 能力与实战表现落差最具代表性。</span>GLM-5 SWE-bench 77.8% 但集成得分仅 22:限流致"Nearly Unusable"、500 错误、65K token 故障、疑似泄露其他用户 token。</p>
</div>
<!-- Tier 4 -->
<div class="card-glow rounded-lg p-6 border-l-4 border-l-rose-500">
<div class="flex items-center gap-3 mb-4">
<span class="tier-badge bg-rose-500/20 text-rose-400 border border-rose-500/30">TIER 4</span>
<span class="font-body text-white font-bold">数据不足(综合 &lt;40)</span>
</div>
<div class="flex flex-wrap gap-3">
<span class="px-3 py-1.5 bg-rose-500/10 border border-rose-500/20 rounded text-rose-300 text-sm font-medium">Seed2.0 <span class="text-rose-500/60 text-xs">32</span></span>
</div>
<p class="text-sm text-slate-400 mt-3">SWE-bench 76.5%(单源 llm-stats.com)模型能力不弱,但 OpenClaw 生态仅 66 条相关 Issue(GitHub Search API),Changelog 无专项修复记录。集成得分 10 源于数据缺失而非模型缺陷,<span class="text-amber-400">排名反映的是生态参与度而非模型质量</span></p>
</div>
</div>
<!-- Core Insights -->
<div class="card-glow rounded-lg p-8">
<div class="font-mono text-xs text-amber-500/60 uppercase tracking-wider mb-6">// 核心观点</div>
<div class="grid md:grid-cols-2 gap-6">
<div class="flex items-start gap-3">
<span class="text-amber-400 text-xl">01</span>
<p class="text-sm text-slate-300 leading-relaxed"><span class="text-white font-medium">Benchmark 高分 ≠ 生产可用</span> — Gemini SWE-bench 80.6%(全场第二)但集成得分仅 45,综合排名第八。GLM-5 SWE-bench 77.8% 但集成得分 22。<span class="text-cyan-400">集成质量和修复响应速度才是 Agent 平台的核心指标。</span></p>
</div>
<div class="flex items-start gap-3">
<span class="text-amber-400 text-xl">02</span>
<p class="text-sm text-slate-300 leading-relaxed"><span class="text-white font-medium">Sonnet > Opus: 公式不偏袒旗舰</span> — 两者集成质量几乎相同(94 vs 96),SWE-bench 差距仅 1.2%,但 Sonnet 综合价值维度 75 vs Opus 的 62。<span class="text-emerald-400">透明公式的结果不预设结论。</span></p>
</div>
<div class="flex items-start gap-3">
<span class="text-amber-400 text-xl">03</span>
<p class="text-sm text-slate-300 leading-relaxed"><span class="text-white font-medium">DeepSeek 实战表现超越 Benchmark 预期</span> — SWE-bench 73.0%(最低之一)但 Issue→Fix 链完整,68-90x 成本优势(getaiperks/costgoat 多源验证),综合评分 80 进入 T1。<span class="text-emerald-400">低成本 + 高集成质量 = 隐性竞争力。</span></p>
</div>
<div class="flex items-start gap-3">
<span class="text-amber-400 text-xl">04</span>
<p class="text-sm text-slate-300 leading-relaxed"><span class="text-rose-400 font-medium">三重偏差必须披露</span> — 本报告由 Claude 生成(偏差 1)+ OpenClaw 创始人推荐 Claude(偏差 2)+ 平台优先适配 Claude(偏差 3)。Claude 的高集成分可能部分归因于平台优化而非纯粹模型优势。<span class="text-amber-400">建议读者参考 Chatbot Arena 盲评和 BFCL 独立排行交叉验证。</span></p>
</div>
</div>
</div>
</section>
<!-- ============ FOOTER ============ -->
<footer class="max-w-7xl mx-auto px-6 py-16 border-t border-amber-500/10">
<div class="flex flex-col md:flex-row justify-between items-start md:items-center gap-6">
<div>
<div class="font-display text-sm text-amber-500/60 mb-2">OPENCLAW LLM INTEL</div>
<p class="text-xs text-slate-500 font-mono max-w-lg">
数据来源:github.com/openclaw/openclaw (Issues/Releases via gh API)、SWE-bench Verified (llm-stats.com + marc0.dev)、
OpenRouter 使用统计 (Dataconomy/State of AI)、Arena ELO (MangoMind)、YC 专访/Lex Fridman Podcast #491(36kr 为编译转载)、pricepertoken.com/costgoat.com 定价。
完整数据溯源见 DATA-MANIFEST.md。
</p>
<div class="mt-3 p-3 border border-amber-500/10 rounded text-[10px] text-slate-600 font-mono max-w-lg leading-relaxed">
<span class="text-amber-500/60">偏差与局限声明 (v6.0)</span><br>
· 综合评分 = SWE-bench x 30% + 集成实战 x 50% + 综合价值 x 20%,公式和维度得分完全公开可验算<br>
· 集成权重 50% 使排名偏向 OpenClaw 生态适配良好的模型,Benchmark 纯能力排名会显著不同<br>
· 三重偏差:本报告由 Claude 生成 + 创始人推荐 Claude + 平台优先适配 Claude<br>
· SWE-bench: 双源交叉验证;GPT-5.4 为前代参考值,Seed2.0 为单源 76.5%<br>
· Arena ELO: MangoMind 2026.2 数据,仅 Claude Opus/Sonnet/Kimi 有可用数据<br>
· BFCL v4 MiniMax 76.8: 厂商自报,未被 BFCL 官方独立确认<br>
· GitHub Issues: Search API ~6,000 条,20 条逐条验证;Issue 数量与模型质量不直接对应<br>
· 6 个未覆盖维度:多模态、上下文窗口、推理延迟、系统化成本、中文专项、安全分级<br>
· 本报告为截止 v2026.3.13 的有限快照
</div>
</div>
<div class="text-right">
<div class="font-mono text-xs text-slate-500">报告日期:2026-03-15</div>
<div class="font-mono text-xs text-slate-600 mt-1">Powered by Claude Opus 4.6 x Agent Team</div>
<div class="font-mono text-[10px] text-slate-700 mt-1">REF: OC-LLM-2026-03-15 // v6.0 (透明公式 · 双源验证 · 三重偏差披露)</div>
<div class="font-mono text-[10px] text-slate-700 mt-1">数据截止:OpenClaw v2026.3.13</div>
</div>
</div>
</footer>
<!-- ============ NAVIGATION ============ -->
<nav class="fixed bottom-6 left-1/2 -translate-x-1/2 z-50 hidden md:block">
<div class="flex items-center gap-1 bg-slate-900/90 backdrop-blur-xl border border-amber-500/20 rounded-full px-2 py-1.5 shadow-2xl">
<a href="#methodology" class="px-3 py-1.5 text-xs font-mono text-slate-400 hover:text-amber-400 transition-colors rounded-full hover:bg-amber-500/10">方法论</a>
<a href="#rankings" class="px-3 py-1.5 text-xs font-mono text-slate-400 hover:text-amber-400 transition-colors rounded-full hover:bg-amber-500/10">排名</a>
<a href="#heatmap" class="px-3 py-1.5 text-xs font-mono text-slate-400 hover:text-amber-400 transition-colors rounded-full hover:bg-amber-500/10">热力图</a>
<a href="#timeline" class="px-3 py-1.5 text-xs font-mono text-slate-400 hover:text-amber-400 transition-colors rounded-full hover:bg-amber-500/10">时间线</a>
<a href="#gap" class="px-3 py-1.5 text-xs font-mono text-slate-400 hover:text-amber-400 transition-colors rounded-full hover:bg-amber-500/10">能力落差</a>
<a href="#findings" class="px-3 py-1.5 text-xs font-mono text-slate-400 hover:text-amber-400 transition-colors rounded-full hover:bg-amber-500/10">发现</a>
<a href="#conclusion" class="px-3 py-1.5 text-xs font-mono text-slate-400 hover:text-amber-400 transition-colors rounded-full hover:bg-amber-500/10">结论</a>
</div>
</nav>
<script>
// ==================== DATA ====================
const rankingsData = [
{ rank:1, name:'Claude Sonnet 4.6', origin:'Anthropic · 闭源', originFlag:'\u{1F1FA}\u{1F1F8}', score:86, swe:79.6, integration:94, valueScore:75, stability:'\u2605\u2605\u2605\u2605\u2605', costValue:'\u2605\u2605\u2605\u2605\u2605', comment:'集成 94 + SWE 79.6% + 性价比之王', tier:'t1', cn:false },
{ rank:2, name:'Claude Opus 4.6', origin:'Anthropic · 闭源', originFlag:'\u{1F1FA}\u{1F1F8}', score:85, swe:80.8, integration:96, valueScore:62, stability:'\u2605\u2605\u2605\u2605\u2605', costValue:'\u2605\u2605\u2605\u2606\u2606', comment:'SWE-bench 最高 + ELO 1510 + 价格最高', tier:'t1', cn:false },
{ rank:3, name:'DeepSeek-V3.2', origin:'深度求索 · MIT · 671B MoE', originFlag:'\u{1F1E8}\u{1F1F3}', score:80, swe:73.0, integration:80, valueScore:92, stability:'\u2605\u2605\u2605\u2605\u2606', costValue:'\u2605\u2605\u2605\u2605\u2605', comment:'Fix 链完整 + 68-90x 成本优势', tier:'t1', cn:true },
{ rank:4, name:'GPT-5.4', origin:'OpenAI · 闭源 · 2026.3.5', originFlag:'\u{1F1FA}\u{1F1F8}', score:76, swe:80.0, integration:82, valueScore:55, stability:'\u2605\u2605\u2605\u2605\u2606', costValue:'\u2605\u2605\u2605\u2606\u2606', comment:'SWE 为 GPT-5.2 参考值 · 发布仅 10 天', tier:'t2', cn:false },
{ rank:5, name:'MiniMax M2.5', origin:'MiniMax · MIT · 228B MoE', originFlag:'\u{1F1E8}\u{1F1F3}', score:71, swe:80.2, integration:60, valueScore:85, stability:'\u2605\u2605\u2605\u2606\u2606', costValue:'\u2605\u2605\u2605\u2605\u2605', comment:'SWE T1 级但 3 个集成 Issue 仍 Open', tier:'t2', cn:true },
{ rank:6, name:'Qwen 3.5', origin:'阿里 · Apache 2.0 · 397B MoE', originFlag:'\u{1F1E8}\u{1F1F3}', score:70, swe:76.4, integration:62, valueScore:82, stability:'\u2605\u2605\u2605\u2606\u2606', costValue:'\u2605\u2605\u2605\u2605\u2606', comment:'llama.cpp/vLLM 可靠 · Ollama 模板 Bug', tier:'t2', cn:true },
{ rank:7, name:'Kimi K2.5', origin:'月之暗面 · 开源 · 1T MoE', originFlag:'\u{1F1E8}\u{1F1F3}', score:64, swe:76.8, integration:50, valueScore:78, stability:'\u2605\u2605\u2606\u2606\u2606', costValue:'\u2605\u2605\u2605\u2605\u2606', comment:'原生能力优秀 · 平台适配问题(非模型缺陷)致集成低分', tier:'t2', cn:true },
{ rank:8, name:'Gemini 3.1 Pro', origin:'DeepMind · 闭源 · Preview', originFlag:'\u{1F1FA}\u{1F1F8}', score:57, swe:80.6, integration:45, valueScore:52, stability:'\u2605\u2605\u2606\u2606\u2606', costValue:'\u2605\u2605\u2605\u2606\u2606', comment:'SWE 第二但 12+ Bug + 死循环', tier:'t3', cn:false },
{ rank:9, name:'GLM-5', origin:'智谱AI · MIT · 744B MoE', originFlag:'\u{1F1E8}\u{1F1F3}', score:44, swe:77.8, integration:22, valueScore:48, stability:'\u2605\u2606\u2606\u2606\u2606', costValue:'\u2605\u2605\u2606\u2606\u2606', comment:'SWE 77.8% vs 集成 22 · 最大落差', tier:'t3', cn:true },
{ rank:10, name:'Seed2.0', origin:'字节跳动 · 闭源 · MoE', originFlag:'\u{1F1E8}\u{1F1F3}', score:32, swe:76.5, integration:10, valueScore:20, stability:'\u2014', costValue:'\u2014', comment:'SWE 76.5%(单源) · 生态数据不足', tier:'t4', cn:true },
];
const heatmapData = {
categories: ['Provider 集成', 'API 兼容性', 'Tool Calling', 'Token/格式', '性能/超时', '平台 vs 模型归因'],
models: ['Claude', 'GPT', 'Gemini', 'DeepSeek', 'Qwen', 'GLM', 'Kimi', 'MiniMax'],
values: [
[3,2,5,3,2,2,3,2],
[1,1,2,2,2,3,2,2],
[0,0,2,2,1,2,2,0],
[1,0,2,3,1,3,1,1],
[0,0,3,2,1,2,2,1],
[0,0,1,1,1,2,3,3],
]
};
const timelineData = [
{ version:'2026.3.13', date:'3月14日', severity:'high', items:[
{ text:'修复 Anthropic ALIASES 启动崩溃(#45069/#44781 → PR #45520,Issue 到修复 1 天)', issues:['#45069','#44781'], models:['Claude'] },
{ text:'GPT-5.4 设为默认测试模型(PR #44367),3月5日发布尚无 SWE-bench/Arena 数据', issues:['PR#44367'], models:['GPT'] },
{ text:'Kimi 兼容性修复:Anthropic Messages 格式、Moonshot CN baseUrl、Ollama cloud 适配', issues:['#38669','#33637','#41519'], models:['Kimi'] },
]},
{ version:'2026.3.12', date:'3月12日', severity:'high', items:[
{ text:'Kimi tool_use 回退修复 — Release Notes 明确引用 #38669,#39907,#40552', issues:['#40552','#39907'], models:['Kimi'] },
{ text:'Gemini model-id 归一化(PR #42435)', issues:['#42435'], models:['Gemini'] },
{ text:'Codex server_error failover 分类缺失', issues:['#45281'], models:['GPT'] },
{ text:'GLM-5 图像识别在此版本后回退 (#46273)', issues:['#46273'], models:['GLM'] },
]},
{ version:'2026.3.10', date:'3月10日', severity:'critical', items:[
{ text:'Kimi Tool Calling 退化 — 根因为 OpenClaw commit 909f26a 的格式转换错误(非模型问题)', issues:['#40552','#39882','#39907'], models:['Kimi'] },
{ text:'Kimi Provider 硬编码 anthropicToolSchemaMode: "openai-functions" 强制错误格式', issues:['#41852'], models:['Kimi'] },
{ text:'修复后仍反复回退:v2026.3.8 和 v2026.3.11 均再次触发', issues:['#41297','#44549','#42481'], models:['Kimi'] },
]},
{ version:'2026.3.8', date:'3月8日', severity:'high', items:[
{ text:'Gemini LLM 请求绕过 systemd 代理', issues:['#42090'], models:['Gemini'] },
{ text:'Google model prefix 未剥离导致 404', issues:['#41398'], models:['Gemini'] },
{ text:'GLM-5/DeepSeek 控制 token 泄露到用户界面', issues:['#42173'], models:['GLM','DeepSeek'] },
{ text:'Azure OpenAI store:false 回归', issues:['#42800'], models:['GPT'] },
{ text:'Gemini 2.5 Pro 思维模型 compaction 死循环', issues:['#41620'], models:['Gemini'] },
]},
{ version:'2026.3.5', date:'3月5日', severity:'high', items:[
{ text:'MiniMax buildMinimaxProvider() 默认 API 类型错误 — 应为 anthropic-messages 而非 openai-completions', issues:['#15275'], models:['MiniMax'] },
{ text:'DeepSeek 控制 token 全角泄露暴露生产打磨不足', issues:['#42173'], models:['DeepSeek'] },
]},
{ version:'2026.3.2', date:'3月2日', severity:'medium', items:[
{ text:'Qwen3.5 + llama.cpp 流式解析错误', issues:['#32916'], models:['Qwen'] },
{ text:'MiniMax API 请求全部超时', issues:['#34487'], models:['MiniMax'] },
{ text:'非 Claude 模型返回空白响应', issues:['#17598'], models:['MiniMax','Kimi'] },
]},
{ version:'2026.3.1', date:'3月1日', severity:'medium', items:[
{ text:'DeepSeek-V3.2 模型名解析为 anthropic/deepseek-chat 失败', issues:['#31850'], models:['DeepSeek'] },
{ text:'子代理模型解析回归,仅 deepseek-chat 可用', issues:['#38305'], models:['多模型'] },
{ text:'developer 角色不被非 OpenAI 后端识别', issues:['#27037'], models:['Qwen','GLM','DeepSeek','Kimi'] },
]},
{ version:'2026.2.22', date:'2月22日', severity:'critical', items:[
{ text:'Anthropic 模型全线崩溃(API URL v1 问题)', issues:['#24709'], models:['Claude'] },
{ text:'Google 模型全线 rate_limit cooldown', issues:['#24839'], models:['Gemini'] },
{ text:'GLM-5 via OpenRouter failover 不触发', issues:['#26970'], models:['GLM'] },
]},
];
const gapData = [
{ name:'Claude Opus 4.6', benchmark:80.8, practice:96, color:'#10b981' },
{ name:'Claude Sonnet 4.6', benchmark:79.6, practice:94, color:'#10b981' },
{ name:'DeepSeek-V3.2', benchmark:73.0, practice:80, color:'#10b981' },
{ name:'GPT-5.4', benchmark:80.0, practice:82, color:'#10b981' },
{ name:'Qwen 3.5', benchmark:76.4, practice:62, color:'#eab308' },
{ name:'MiniMax M2.5', benchmark:80.2, practice:60, color:'#eab308' },
{ name:'Kimi K2.5', benchmark:76.8, practice:50, color:'#ea580c' },
{ name:'Gemini 3.1 Pro', benchmark:80.6, practice:45, color:'#ea580c' },
{ name:'GLM-5', benchmark:77.8, practice:22, color:'#dc2626' },
{ name:'Seed2.0', benchmark:76.5, practice:10, color:'#dc2626' },
];
const findingsData = [
{ title:'评分公式透明化:综合评分 = SWE x 30% + 集成 x 50% + 价值 x 20%', issue:'方法论 v6.0', severity:'positive', desc:'v6.0 将不透明的主观评分替换为可验算的加权公式。集成权重 50% 体现报告定位(OpenClaw 生态实战),意味着 Benchmark 排名第二的 Gemini(SWE 80.6%)因集成得分仅 45 而综合排名第八。读者可使用不同权重自行重算。', models:['全部'] },
{ title:'Sonnet > Opus:透明公式不预设结论', issue:'公式验算', severity:'positive', desc:'Sonnet 集成得分 94(vs Opus 96),SWE-bench 仅低 1.2%,但综合价值维度 75 vs 62。公式计算 Sonnet 86 > Opus 85。这一反直觉结果恰恰证明公式不是为"把最贵的模型放第一"而设计,而是按证据计算。', models:['Claude'] },
{ title:'DeepSeek 升入 T1:集成质量被低估', issue:'#31850 #42173 #46975', severity:'positive', desc:'Issue→Fix 链完整:#31850(模型名解析)→已修复、#42173(控制 token 泄露)→已修复,当前仅剩轻微 UI 问题(#46975)。成本优势 68-90x(getaiperks $1.10 vs $75 = 68x,costgoat $0.28 vs $25 = 89x)。SWE-bench 73.0% 虽低但集成+价值弥补。', models:['DeepSeek'] },
{ title:'Gemini 降至 T3:Benchmark 最强不等于实战最强', issue:'#41620 #41398 #42090', severity:'critical', desc:'SWE-bench 80.6%(全场第二),但 OpenClaw 集成问题严重:#41620 死循环仍 Open、model prefix 未剥离致 404(#41398)、绕过 systemd 代理(#42090),12+ 协议层 Bug。集成得分 45,落差 35.6 分为全场最大之一。', models:['Gemini'] },
{ title:'MiniMax SWE-bench T1 级但集成仍有 3 个 Open Issue', issue:'#15275 #33133 #34487 #45882', severity:'high', desc:'根因 #15275(buildMinimaxProvider API 类型错误)已修复,但下游 #33133(Anthropic Messages 不投递)、#34487(API 超时)、#45882(流式输出失败)仍 Open。BFCL multi-turn 76.8 为厂商自报(FelloAI 标注未被 BFCL 官方独立确认)。SWE-bench 80.2% 经双源验证。', models:['MiniMax'] },
{ title:'Kimi 原生能力优秀但 OpenClaw 适配反复回退', issue:'#40552 #41852 #39882', severity:'high', desc:'curl 直测证明 Kimi API 原生 tool calling 正常(stop_reason: "tool_use")。问题出在 OpenClaw commit 909f26a 格式转换错误 + Provider 硬编码。SWE-bench 76.8%、ELO ~1370。v2026.3.8/3.11/3.12 修复后仍回退,集成得分 50。', models:['Kimi'] },
{ title:'GLM-5 能力-集成落差最严重:SWE 77.8% vs 集成 22', issue:'#46273 #42173 Reddit', severity:'high', desc:'SWE-bench 77.8% 模型能力不弱,但基础设施问题最深:限流致"Nearly Unusable"、500 错误频发、65K token 后故障、疑似泄露其他用户 token。v2026.3.12 后图像识别回退(#46273)。Changelog 无 GLM 专项修复,集成得分 22。', models:['GLM'] },
{ title:'三重偏差披露:Claude 生成 + 创始人推荐 + 平台适配', issue:'透明度声明', severity:'medium', desc:'偏差 1: 本报告由 Claude Opus 4.6 生成。偏差 2: OpenClaw 创始人 Peter Steinberger 公开推荐 Claude(36kr/Fortune 专访)。偏差 3: 平台优先测试/适配 Claude,使 Claude Bug 更少可能部分归因于平台优化。建议交叉参考 Arena 盲评(Claude Opus ELO 1510、Sonnet 1435)。', models:['Claude'] },
{ title:'OpenRouter 数据:使用量 ≠ 质量', issue:'Dataconomy 2026.2.24', severity:'medium', desc:'中国模型占 token 消费 61%:MiniMax 周 2.45T(+197%)、Kimi 1.21T(-20%)、GLM-5 780B(+158%)。但 Claude 占编程查询 60%+。GLM-5 使用量激增与"Nearly Unusable"评价存在张力 — 可能在简单场景可用但 Agent 场景不稳定。', models:['全部'] },
];
// ==================== RENDER FUNCTIONS ====================
function animateCounter(el, target, suffix='', duration=2000) {
const start = performance.now();
const isK = target >= 1000;
function update(now) {
const elapsed = now - start;
const progress = Math.min(elapsed / duration, 1);
const ease = 1 - Math.pow(1 - progress, 3);
const current = Math.floor(target * ease);
if (isK && suffix === 'k+') {
el.textContent = Math.floor(current / 1000) + 'k+';
} else {
el.textContent = current.toLocaleString() + (suffix && !isK ? suffix : '');
}
if (progress < 1) requestAnimationFrame(update);
}
requestAnimationFrame(update);
}
function renderRankings(data) {
const tbody = document.getElementById('rankBody');
const tierColors = { t1:'emerald', t2:'cyan', t3:'amber', t4:'rose' };
const tierLabels = { t1:'T1', t2:'T2', t3:'T3', t4:'T4' };
tbody.innerHTML = data.map((r, i) => `
<tr class="border-b border-slate-800/50 hover:bg-slate-800/30 transition-colors" style="animation: fadeInUp 0.4s ease-out ${i*0.05}s forwards; opacity:0">
<td class="py-3 px-3">
<span class="tier-badge bg-${tierColors[r.tier]}-500/20 text-${tierColors[r.tier]}-400 border border-${tierColors[r.tier]}-500/30">${tierLabels[r.tier]}</span>
</td>
<td class="py-3 px-3">
<div class="flex items-center gap-2">
<span class="text-lg">${r.originFlag}</span>
<span class="font-medium text-white">${r.name}</span>
</div>
</td>
<td class="py-3 px-3 text-slate-400 text-xs font-mono hidden md:table-cell">${r.origin}</td>
<td class="py-3 px-3">
<div class="flex items-center gap-2">
<div class="w-16 h-1.5 bg-slate-800 rounded-full overflow-hidden">
<div class="h-full rounded-full bar-chart-bar" style="width:${r.score}%; background: linear-gradient(90deg, ${r.score>=78?'#10b981':r.score>=62?'#22d3ee':r.score>=40?'#eab308':'#f43f5e'}, ${r.score>=78?'#34d399':r.score>=62?'#67e8f9':r.score>=40?'#fbbf24':'#fb7185'}); animation-delay:${i*0.1}s"></div>
</div>
<span class="font-mono text-sm font-bold" style="color:${r.score>=78?'#34d399':r.score>=62?'#67e8f9':r.score>=40?'#fbbf24':'#fb7185'}">${r.score}</span>
</div>
</td>
<td class="py-3 px-3 font-mono text-sm text-slate-300">${r.swe}%</td>
<td class="py-3 px-3 font-mono text-sm" style="color:${r.integration>=80?'#34d399':r.integration>=60?'#67e8f9':r.integration>=40?'#fbbf24':'#fb7185'}">${r.integration}</td>
<td class="py-3 px-3 text-sm tracking-wider">${r.stability}</td>
<td class="py-3 px-3 text-sm tracking-wider">${r.costValue}</td>
<td class="py-3 px-3 text-xs text-slate-500 italic hidden md:table-cell">"${r.comment}"</td>
</tr>
`).join('');
}
let currentSort = { col: 'rank', dir: 'asc' };
document.querySelectorAll('.sort-header').forEach(th => {
th.addEventListener('click', () => {
const col = th.dataset.col;
if (currentSort.col === col) {
currentSort.dir = currentSort.dir === 'asc' ? 'desc' : 'asc';
} else {
currentSort = { col, dir: 'asc' };
}
document.querySelectorAll('.sort-header').forEach(h => h.classList.remove('active','asc','desc'));
th.classList.add('active', currentSort.dir);
const sorted = [...rankingsData].sort((a,b) => {
let va = a[col], vb = b[col];
if (currentSort.dir === 'desc') [va,vb] = [vb,va];
return va - vb;
});
renderRankings(sorted);
});
});
function renderHeatmap() {
const grid = document.getElementById('heatmapGrid');
const colors = ['#065f46','#059669','#eab308','#ea580c','#dc2626'];
const labels = ['无问题','轻微','中等','较多','严重'];
let html = '<div style="display:grid; grid-template-columns: 120px repeat(8, 1fr); gap: 3px;">';
html += '<div class="text-xs font-mono text-slate-500 p-2"></div>';
heatmapData.models.forEach(m => {
html += `<div class="text-xs font-mono text-center p-2 text-slate-400 font-medium">${m}</div>`;
});
heatmapData.categories.forEach((cat, ri) => {
html += `<div class="text-xs font-mono text-slate-400 p-2 flex items-center">${cat}</div>`;
heatmapData.values[ri].forEach((val, ci) => {
const bg = colors[Math.min(val, 4)];
const label = labels[Math.min(val, 4)];
html += `<div class="heatmap-cell rounded p-3 text-center font-mono text-xs font-bold" style="background:${bg}; color:${val<=1?'#d1fae5':'#fff'}; animation: fadeIn 0.3s ease-out ${(ri*8+ci)*0.03}s forwards; opacity:0">
${val}
<div class="tooltip font-mono">${heatmapData.models[ci]} \u00b7 ${cat}: ${label} (${val})</div>
</div>`;
});
});
html += '<div class="text-xs font-mono text-amber-400 p-2 flex items-center font-bold">总计</div>';
for (let ci = 0; ci < heatmapData.models.length; ci++) {
let total = 0;
heatmapData.values.forEach(row => total += row[ci]);
const bg = total <= 6 ? '#065f46' : total <= 10 ? '#eab308' : total <= 14 ? '#ea580c' : '#dc2626';
html += `<div class="heatmap-cell rounded p-3 text-center font-mono text-xs font-bold" style="background:${bg}; color:#fff">${total}<div class="tooltip font-mono">${heatmapData.models[ci]} 总问题数: ${total}</div></div>`;
}
html += '</div>';
grid.innerHTML = html;
}
function renderTimeline() {
const container = document.getElementById('timelineContainer');
const sevColors = { critical:'#dc2626', high:'#ea580c', medium:'#eab308' };
container.innerHTML = timelineData.map((entry, i) => `
<div class="relative pl-16 animate-slide" style="animation-delay: ${i*0.15}s">
<div class="absolute left-0 top-0">
<div class="timeline-dot" style="border-color: ${sevColors[entry.severity]}"></div>
</div>
<div class="card-glow rounded-lg p-5">
<div class="flex items-center gap-3 mb-3">
<span class="font-display text-sm font-bold text-white">v${entry.version}</span>
<span class="font-mono text-xs text-slate-500">${entry.date}</span>
<span class="px-2 py-0.5 rounded text-[10px] font-mono uppercase tracking-wider" style="background:${sevColors[entry.severity]}22; color:${sevColors[entry.severity]}; border: 1px solid ${sevColors[entry.severity]}44">${entry.severity}</span>
</div>
<ul class="space-y-2">
${entry.items.map(item => `
<li class="flex items-start gap-2 text-sm">
<span class="text-amber-400 mt-0.5 shrink-0">\u25b8</span>
<div>
<span class="text-slate-300">${item.text}</span>
<span class="font-mono text-xs text-cyan-400/60 ml-1">${item.issues.join(' ')}</span>
<span class="ml-2">${item.models.map(m => `<span class="inline-block px-1.5 py-0.5 bg-slate-800 rounded text-[10px] font-mono text-slate-400 ml-0.5">${m}</span>`).join('')}</span>
</div>
</li>
`).join('')}
</ul>
</div>
</div>
`).join('');
}
function renderGapChart() {
const container = document.getElementById('gapChart');
container.innerHTML = gapData.map((d, i) => {
const gap = d.practice - d.benchmark;
const gapFixed = gap.toFixed(1);
const gapLabel = gap > 0 ? `+${gapFixed} \u8d85\u989d\u4ea4\u4ed8` : gap > -15 ? `${gapFixed} \u8f7b\u5fae\u843d\u5dee` : gap > -30 ? `${gapFixed} \u663e\u8457\u843d\u5dee` : `${gapFixed} \u4e25\u91cd\u843d\u5dee`;
return `
<div class="animate-slide" style="animation-delay: ${i*0.1}s">
<div class="flex items-center justify-between mb-2">
<span class="font-medium text-white text-sm">${d.name}</span>
<span class="font-mono text-xs px-2 py-0.5 rounded" style="background:${d.color}22; color:${d.color}; border:1px solid ${d.color}44">
${gapLabel}
</span>
</div>
<div class="flex gap-1 items-center mb-1">
<span class="font-mono text-[10px] text-slate-500 w-20 shrink-0">SWE-bench</span>
<div class="h-4 rounded bar-chart-bar" style="width: ${d.benchmark}%; background: ${d.color}55; animation-delay: ${i*0.15}s"></div>
<span class="font-mono text-[10px] text-slate-400 ml-1">${d.benchmark}%</span>
</div>
<div class="flex gap-1 items-center">
<span class="font-mono text-[10px] text-slate-500 w-20 shrink-0">\u96c6\u6210\u5b9e\u6218</span>
<div class="h-4 rounded bar-chart-bar" style="width: ${d.practice}%; background: ${d.color}; animation-delay: ${i*0.15+0.2}s"></div>
<span class="font-mono text-[10px] text-slate-400 ml-1">${d.practice}</span>
</div>
</div>`;
}).join('');
}
function renderFindings() {
const grid = document.getElementById('findingsGrid');
const sevIcons = { critical:'\u{1F534}', high:'\u{1F7E0}', medium:'\u{1F7E1}', positive:'\u{1F7E2}' };
const sevLabels = { critical:'\u4e25\u91cd', high:'\u9ad8\u5371', medium:'\u4e2d\u7b49', positive:'\u6b63\u9762' };
grid.innerHTML = findingsData.map((f, i) => `
<div class="issue-card card-glow rounded-lg p-5 animate-in" style="animation-delay: ${i*0.1}s; border-left-color: ${f.severity==='positive'?'#10b981':f.severity==='critical'?'#dc2626':f.severity==='high'?'#ea580c':'#eab308'}">
<div class="flex items-center gap-2 mb-3">
<span>${sevIcons[f.severity]}</span>
<span class="font-medium text-white text-sm">${f.title}</span>
</div>
<div class="flex items-center gap-2 mb-3">
<span class="font-mono text-xs text-cyan-400 bg-cyan-500/10 px-2 py-0.5 rounded">${f.issue}</span>
<span class="font-mono text-[10px] text-slate-500">${sevLabels[f.severity]}</span>
</div>
<p class="text-xs text-slate-400 leading-relaxed mb-3">${f.desc}</p>
<div class="flex flex-wrap gap-1">
${f.models.map(m => `<span class="px-1.5 py-0.5 bg-slate-800 rounded text-[10px] font-mono text-slate-400">${m}</span>`).join('')}
</div>
</div>
`).join('');
}
// ==================== THEME ====================
function applyTheme(theme) {
document.documentElement.setAttribute('data-theme', theme);
const sun = document.getElementById('themeIconSun');
const moon = document.getElementById('themeIconMoon');
if (theme === 'dark') {
sun.style.display = 'block';
moon.style.display = 'none';
} else {
sun.style.display = 'none';
moon.style.display = 'block';
}
}
(function initTheme() {
const saved = localStorage.getItem('oc-report-theme');
applyTheme(saved || 'light');
})();
document.getElementById('themeToggle').addEventListener('click', () => {
const current = document.documentElement.getAttribute('data-theme');
const next = current === 'dark' ? 'light' : 'dark';
localStorage.setItem('oc-report-theme', next);
applyTheme(next);
});
// ==================== INIT ====================
document.addEventListener('DOMContentLoaded', () => {
setTimeout(() => animateCounter(document.getElementById('stat1'), 6000, '+', 2000), 800);
setTimeout(() => animateCounter(document.getElementById('stat2'), 10, '+', 1500), 1000);
setTimeout(() => animateCounter(document.getElementById('stat3'), 313000, 'k+', 2500), 1200);
renderRankings(rankingsData);
renderHeatmap();
renderTimeline();
renderGapChart();
renderFindings();
const observer = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
entry.target.style.animationPlayState = 'running';
}
});
}, { threshold: 0.1 });
document.querySelectorAll('.animate-in, .animate-slide, .animate-fade').forEach(el => {
observer.observe(el);
});
document.querySelectorAll('a[href^="#"]').forEach(a => {
a.addEventListener('click', e => {
e.preventDefault();
document.querySelector(a.getAttribute('href'))?.scrollIntoView({ behavior: 'smooth', block: 'start' });
});
});
});
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment