-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.html
More file actions
157 lines (143 loc) · 8.23 KB
/
index.html
File metadata and controls
157 lines (143 loc) · 8.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Lynchmark – LLM Benchmark</title>
<link rel="icon" href="/assets/favicon.webp" type="image/webp">
<meta property="og:title" content="Lynchmark – LLM Benchmark">
<meta property="og:site_name" content="Lynchmark">
<meta name="description" content="Lynchmark tests LLMs by requiring correct CDN imports and library-specific implementations to solve challenging browser-based JavaScript tasks.">
<meta property="og:description" content="Lynchmark tests LLMs by requiring correct CDN imports and library-specific implementations to solve challenging browser-based JavaScript tasks.">
<meta property="og:type" content="website">
<meta property="og:url" content="https://lynchmark.com/">
<link rel="canonical" href="https://lynchmark.com/">
<script type="application/ld+json">
{
"@context":"https://schema.org",
"@type":"WebSite",
"name":"Lynchmark",
"url":"https://lynchmark.com/",
"description":"Lynchmark – an automated benchmark for LLM coding abilities in a real browser+CDN environment."
}
</script>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=DM+Serif+Display:ital@0;1&family=IBM+Plex+Mono:wght@400;500&display=swap" rel="stylesheet">
<script src="https://cdn.tailwindcss.com"></script>
<style>
@font-face{font-family:"Stain";src:url("https://cdn.jsdelivr.net/gh/multipleof4/stain.otf@master/dist/Stain.otf") format("opentype")}
body{font-family:Inter,system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif}
.mono{font-family:"IBM Plex Mono",ui-monospace,SFMono-Regular,Menlo,monospace}
</style>
<script data-goatcounter="https://lynch.goatcounter.com/count" async src="//gc.zgo.at/count.js"></script>
<script defer src="https://c.planetrenox.com/tracker.js"></script>
</head>
<body class="bg-gray-50 text-gray-800">
<main class="max-w-2xl mx-auto flex flex-col min-h-screen p-6 lg:p-8">
<header class="text-center mb-10">
<div class="relative inline-block">
<h1 class="text-4xl font-bold text-gray-900 mb-2">Lynchmark</h1>
<span class="mono pointer-events-none absolute -top-2 -right-3 inline-flex items-center rounded-full border border-green-200 bg-green-50 text-green-700 text-xs leading-none font-medium px-2 py-1 shadow-sm">
Last updated <time id="last-updated" class="ml-1"></time>
</span>
</div>
<p class="text-base text-gray-600 max-w-lg mx-auto" style="font-family:Stain,sans-serif">
This benchmark tests the model's knowledge by tasking it to import the right library from the right CDN URL path and having the pre-existing library specific knowledge to correctly implement a solution for each challenging problem for/in the browser environment using JavaScript.
</p>
</header>
<div id="results-container" class="flex flex-col gap-6 flex-grow">
</div>
<div class="mt-12 text-center space-y-2">
<a href="/blog/lynchmark-newsletter-experiment.html" class="block text-sm text-blue-500 hover:text-blue-700 font-medium mono">blog/lynchmark-newsletter-experiment</a>
</div>
<footer class="mt-10 flex justify-center">
<a
href="https://github.com/multipleof4/lynchmark"
class="inline-flex items-center gap-2 text-gray-600 hover:text-gray-900"
target="_blank"
rel="noopener noreferrer"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
aria-hidden="true"
class="w-5 h-5 fill-current"
>
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38
0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52
0-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95
0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0
1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15
0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2
0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
</svg>
<span class="mono text-xs font-medium">@multipleof4/lynchmark</span>
</a>
</footer>
</main>
<script type="module">
const get=id=>document.getElementById(id);
const container=get('results-container');
const updatedEl=get('last-updated');
const now=new Date();
updatedEl.textContent=now.toLocaleDateString('en-US',{month:'short',year:'numeric'});
updatedEl.dateTime=now.toISOString().split('T')[0];
const grades=[[.97,'A+'],[.93,'A'],[.9,'A-'],[.87,'B+'],[.83,'B'],[.8,'B-'],[.77,'C+'],[.73,'C'],[.7,'C-'],[.6,'D'],[0,'F']];
const gradeOf=ratio=>grades.find(([floor])=>ratio>=floor)[1];
const run=async()=>{
const readme=await fetch('./README').then(r=>r.text());
const models=readme.match(/<!-- MODELS_START -->\n([\s\S]+?)\n<!-- MODELS_END -->/)[1].trim().split('\n');
const testsRes=await fetch('https://api.github.com/repos/multipleof4/lynchmark/contents/tests');
const testsData=await testsRes.json();
const tests=testsData.filter(d=>d.type==='dir').map(d=>d.name).sort((a,b)=>parseInt(a)-parseInt(b));
for(const model of models){
const sModel=model.replace(/[\/:]/g,'_');
const card=document.createElement('section');
card.className='rounded-2xl border border-gray-200 bg-white shadow-sm overflow-hidden';
card.innerHTML=`
<div class="bg-gray-50 px-5 py-3 border-b border-gray-200">
<p class="mono text-sm text-gray-700 font-medium">${model}</p>
</div>
<ul class="p-4 space-y-2" id="list-${sModel}"></ul>`;
container.appendChild(card);
const list=get(`list-${sModel}`);
let passed=0;
let ran=0;
for(const test of tests){
const li=document.createElement('li');
li.className='flex items-center gap-3 text-sm';
list.appendChild(li);
const outUrl=`./tests/${test}/outputs/${sModel}.js`;
const srcP=fetch(outUrl).then(r=>{
if(!r.ok) throw new Error('404');
return r.text();
}).catch(()=>null);
li.innerHTML=`<svg class="animate-spin h-4 w-4 text-gray-400" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24"><circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle><path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path></svg><span class="font-medium text-gray-800">${test}</span><span class="mono text-gray-500 ml-auto">...</span>`;
const src=await srcP;
if(src===null){
li.innerHTML=`— <span class="font-medium text-gray-800">${test}</span><span class="mono text-gray-500 ml-auto">N/A</span>`;
continue;
}
ran++;
const resMatch=src.match(/\/\/ Result: (PASS|FAIL)/);
const status=resMatch?(resMatch[1]==='PASS'?'✅':'❌'):'❓';
if(status==='✅')passed++;
const fTime=src.match(/\/\/ Generation time: ([\d\.]+)s/)?.[1];
const timeStr=fTime?`${parseFloat(fTime).toFixed(3)}s`:'N/A';
li.innerHTML=`${status} <span class="font-medium text-gray-800">${test}</span><span class="mono text-gray-500 ml-auto">${timeStr}</span>`;
}
const ratio=ran?passed/ran:0;
const li=document.createElement('li');
li.className='mt-3 pt-3 border-t border-gray-200 flex items-center text-sm justify-between';
const grade=gradeOf(ratio);
li.innerHTML=`
<span class="text-gray-600">Score</span>
<span class="flex items-center gap-3">
<span class="mono text-gray-900 font-semibold">${passed}/${ran}</span>
<span class="inline-flex items-center rounded-full bg-gray-100 px-2 py-0.5 text-xs font-semibold text-gray-800">${grade}</span>
</span>`;
list.appendChild(li);
}
};
run();
</script>
</body>
</html>