diff --git a/components/filter-sidebar.tsx b/components/filter-sidebar.tsx
new file mode 100644
index 0000000..a7bc554
--- /dev/null
+++ b/components/filter-sidebar.tsx
@@ -0,0 +1,261 @@
+'use client'
+
+import { useMemo, useState } from 'react'
+import { Search, X, Filter, ChevronDown, ChevronRight } from 'lucide-react'
+import type { LeaderboardEntry, BenchmarkVersion } from '@/lib/types'
+import { PROVIDER_COLORS } from '@/lib/types'
+import { VersionSelector } from '@/components/version-selector'
+import { Checkbox } from '@/components/ui/checkbox'
+import {
+ Sidebar,
+ SidebarContent,
+ SidebarHeader,
+ SidebarGroup,
+ SidebarGroupContent,
+ SidebarInput,
+ SidebarSeparator,
+ SidebarFooter,
+} from '@/components/ui/sidebar'
+
+interface FilterSidebarProps {
+ entries: LeaderboardEntry[]
+ versions: BenchmarkVersion[]
+ currentVersion: string | null
+ officialOnly: boolean
+ openWeightsOnly: boolean
+ providerFilters: string[]
+ lastUpdated: string
+ onOfficialOnlyChange: (officialOnly: boolean) => void
+ onOpenWeightsOnlyChange: (openWeightsOnly: boolean) => void
+ onProviderToggle: (provider: string) => void
+ onClearProviders: () => void
+ onClearAll: () => void
+}
+
+function CollapsibleGroup({
+ label,
+ defaultOpen = true,
+ children,
+}: {
+ label: string
+ defaultOpen?: boolean
+ children: React.ReactNode
+}) {
+ const [open, setOpen] = useState(defaultOpen)
+
+ return (
+
+
+ {open && {children}}
+
+ )
+}
+
+export function FilterSidebar({
+ entries,
+ versions,
+ currentVersion,
+ officialOnly,
+ openWeightsOnly,
+ providerFilters,
+ lastUpdated,
+ onOfficialOnlyChange,
+ onOpenWeightsOnlyChange,
+ onProviderToggle,
+ onClearProviders,
+ onClearAll,
+}: FilterSidebarProps) {
+ const [providerSearch, setProviderSearch] = useState('')
+
+ // Extract unique providers from entries, sorted by count
+ const providers = useMemo(() => {
+ const counts = new Map
()
+ for (const entry of entries) {
+ const p = entry.provider.toLowerCase()
+ counts.set(p, (counts.get(p) || 0) + 1)
+ }
+ return Array.from(counts.entries())
+ .sort((a, b) => b[1] - a[1])
+ .map(([name, count]) => ({ name, displayName: name.charAt(0).toUpperCase() + name.slice(1), count }))
+ }, [entries])
+
+ const filteredProviders = useMemo(() => {
+ if (!providerSearch) return providers
+ const q = providerSearch.toLowerCase()
+ return providers.filter((p) => p.name.includes(q))
+ }, [providers, providerSearch])
+
+ const activeFilterCount = useMemo(() => {
+ let count = 0
+ if (!officialOnly) count++
+ if (openWeightsOnly) count++
+ if (providerFilters.length > 0) count += providerFilters.length
+ return count
+ }, [officialOnly, openWeightsOnly, providerFilters])
+
+ return (
+
+
+
+
+
+ Filters
+ {activeFilterCount > 0 && (
+
+ {activeFilterCount}
+
+ )}
+
+ {activeFilterCount > 0 && (
+
+ )}
+
+
+
+
+ {/* Data Quality */}
+
+
+
+
+
+
+
+
+
+ {/* Version */}
+
+
+
+
+
+
+
+
+ {/* Providers */}
+
+
+ {/* Search input */}
+
+
+ setProviderSearch(e.target.value)}
+ className="h-8 pl-7 pr-7 text-xs"
+ />
+ {providerSearch && (
+
+ )}
+
+
+ {/* Active provider filters badges */}
+ {providerFilters.length > 0 && (
+
+ {providerFilters.map((p) => {
+ const display = providers.find(pr => pr.name === p.toLowerCase())?.displayName || (p.charAt(0).toUpperCase() + p.slice(1))
+ return (
+
+
+ {display}
+
+
+ )
+ })}
+
+ )}
+
+ {/* Provider checkbox list */}
+
+ {filteredProviders.map((provider) => {
+ const isActive = providerFilters.some(p => p.toLowerCase() === provider.name)
+ const color = PROVIDER_COLORS[provider.name] || '#666'
+ return (
+
+ )
+ })}
+ {filteredProviders.length === 0 && (
+
+ No providers match "{providerSearch}"
+
+ )}
+
+
+
+
+
+
+
+ Updated {lastUpdated}
+
+
+
+ )
+}
diff --git a/components/leaderboard-header.tsx b/components/leaderboard-header.tsx
index 8ab93e5..94ad62b 100644
--- a/components/leaderboard-header.tsx
+++ b/components/leaderboard-header.tsx
@@ -1,9 +1,9 @@
'use client'
import Link from 'next/link'
-import type { BenchmarkVersion, LeaderboardEntry } from '@/lib/types'
-import { VersionSelector } from '@/components/version-selector'
+import type { LeaderboardEntry } from '@/lib/types'
import { ModelSearch } from '@/components/model-search'
+import { SidebarTrigger } from '@/components/ui/sidebar'
type ViewMode = 'success' | 'speed' | 'cost' | 'value' | 'graphs'
type ScoreMode = 'best' | 'average'
@@ -12,21 +12,13 @@ interface LeaderboardHeaderProps {
entries: LeaderboardEntry[]
filteredEntryCount: number
totalRuns: number
- versions: BenchmarkVersion[]
currentVersion: string | null
- lastUpdated: string
- providerFilter: string | null
- providerColor?: string
+ officialOnly: boolean
view: ViewMode
scoreMode: ScoreMode
- officialOnly: boolean
- openWeightsOnly: boolean
modelSearchValue: string
onViewChange: (view: ViewMode) => void
onScoreModeChange: (mode: ScoreMode) => void
- onOfficialOnlyChange: (officialOnly: boolean) => void
- onOpenWeightsOnlyChange: (openWeightsOnly: boolean) => void
- onClearProviderFilter: () => void
onModelSearchChange: (value: string) => void
}
@@ -34,37 +26,28 @@ export function LeaderboardHeader({
entries,
filteredEntryCount,
totalRuns,
- versions,
currentVersion,
- lastUpdated,
- providerFilter,
- providerColor,
+ officialOnly,
view,
scoreMode,
- officialOnly,
- openWeightsOnly,
modelSearchValue,
onViewChange,
onScoreModeChange,
- onOfficialOnlyChange,
- onOpenWeightsOnlyChange,
- onClearProviderFilter,
onModelSearchChange,
}: LeaderboardHeaderProps) {
return (
-
-
-
-
-
-

+
- {/* Navigation buttons - 2x3 grid on mobile, inline on desktop */}
-
+ {/* View mode buttons */}
+
-
-
-
- Updated {lastUpdated}
-
)
diff --git a/components/leaderboard-view.tsx b/components/leaderboard-view.tsx
index e8ccb9a..c5abd32 100644
--- a/components/leaderboard-view.tsx
+++ b/components/leaderboard-view.tsx
@@ -3,14 +3,18 @@
import { useCallback, useMemo, useState } from 'react'
import { useSearchParams, useRouter, usePathname } from 'next/navigation'
import type { LeaderboardEntry, BenchmarkVersion } from '@/lib/types'
-import { PROVIDER_COLORS } from '@/lib/types'
+
import { SimpleLeaderboard } from '@/components/simple-leaderboard'
import { ScatterGraphs } from '@/components/scatter-graphs'
import { TaskHeatmap } from '@/components/task-heatmap'
import { ScoreDistribution } from '@/components/score-distribution'
import { ModelRadar } from '@/components/model-radar'
import { LeaderboardHeader } from '@/components/leaderboard-header'
+import { FilterSidebar } from '@/components/filter-sidebar'
import { KiloClawAdCard } from '@/components/kiloclaw-ad-card'
+import { SidebarProvider, SidebarInset } from '@/components/ui/sidebar'
+import { TopBanner } from '@/components/top-banner'
+
type ViewMode = 'success' | 'speed' | 'cost' | 'value' | 'graphs'
type ScoreMode = 'best' | 'average'
@@ -46,7 +50,7 @@ export function LeaderboardView({ entries, lastUpdated, versions, currentVersion
const initialScoreMode = VALID_SCORE_MODES.includes(searchParams.get('score') as ScoreMode)
? (searchParams.get('score') as ScoreMode)
: 'best'
- const initialProvider = searchParams.get('provider') || null
+ const initialProviders = searchParams.get('provider')?.split(',').filter(Boolean).map(p => p.toLowerCase()) || []
const initialOpenWeights = searchParams.get('weights') === 'open'
const initialGraphTab = VALID_GRAPH_TABS.includes(searchParams.get('graph') as GraphSubTab)
? (searchParams.get('graph') as GraphSubTab)
@@ -56,7 +60,7 @@ export function LeaderboardView({ entries, lastUpdated, versions, currentVersion
const [view, setViewState] = useState
(initialView)
const [officialOnlyState, setOfficialOnlyState] = useState(officialOnly)
const [scoreMode, setScoreModeState] = useState(initialScoreMode)
- const [providerFilter, setProviderFilterState] = useState(initialProvider)
+ const [providerFilters, setProviderFiltersState] = useState(initialProviders)
const [openWeightsOnly, setOpenWeightsOnlyState] = useState(initialOpenWeights)
const [graphSubTab, setGraphSubTabState] = useState(initialGraphTab)
const [modelSearch, setModelSearchState] = useState(initialModelSearch)
@@ -88,9 +92,31 @@ export function LeaderboardView({ entries, lastUpdated, versions, currentVersion
updateUrl({ score: m })
}, [updateUrl])
- const setProviderFilter = useCallback((p: string | null) => {
- setProviderFilterState(p)
- updateUrl({ provider: p })
+ const toggleProviderFilter = useCallback((p: string) => {
+ const normalizedP = p.toLowerCase()
+ const isCurrentlyActive = providerFilters.includes(normalizedP)
+ const next = isCurrentlyActive
+ ? providerFilters.filter(item => item !== normalizedP)
+ : [...providerFilters, normalizedP]
+
+ setProviderFiltersState(next)
+ updateUrl({ provider: next.length ? next.join(',') : null })
+ }, [providerFilters, updateUrl])
+
+ const clearProviderFilters = useCallback(() => {
+ setProviderFiltersState([])
+ updateUrl({ provider: null })
+ }, [updateUrl])
+
+ const clearAllFilters = useCallback(() => {
+ setOfficialOnlyState(true)
+ setOpenWeightsOnlyState(false)
+ setProviderFiltersState([])
+ updateUrl({
+ official: null,
+ weights: null,
+ provider: null
+ })
}, [updateUrl])
const setOpenWeightsOnly = useCallback((v: boolean) => {
@@ -125,98 +151,112 @@ export function LeaderboardView({ entries, lastUpdated, versions, currentVersion
const filteredEntries = useMemo(() => {
return entries.filter(entry => {
- if (providerFilter && entry.provider.toLowerCase() !== providerFilter.toLowerCase()) return false
+ if (providerFilters.length > 0 && !providerFilters.some(p => p.toLowerCase() === entry.provider.toLowerCase())) return false
if (openWeightsOnly && entry.weights !== 'Open') return false
if (modelSearch && !entry.model.toLowerCase().includes(modelSearch.toLowerCase())) return false
return true
})
- }, [entries, providerFilter, openWeightsOnly, modelSearch])
+ }, [entries, providerFilters, openWeightsOnly, modelSearch])
- const providerColor = providerFilter
- ? PROVIDER_COLORS[providerFilter.toLowerCase()] || '#666'
- : undefined
-
- const totalRuns = entries.reduce((acc, entry) => acc + (entry.submission_count || 1), 0)
+ const totalRuns = useMemo(() => {
+ return filteredEntries.reduce((sum, entry) => sum + (entry.submission_count || 1), 0)
+ }, [filteredEntries])
return (
-
-
+ setProviderFilter(null)}
- onModelSearchChange={handleModelSearchChange}
- modelSearchValue={modelSearch}
+ onProviderToggle={toggleProviderFilter}
+ onClearProviders={clearProviderFilters}
+ onClearAll={clearAllFilters}
/>
+
+
+
-
- {view === 'graphs' ? (
-
- {/* Graph sub-tabs */}
-
- {([
- ['scatter', 'Scatter Plots'],
- ['heatmap', 'Task Heatmap'],
- ['distribution', 'Score Distribution'],
- ['radar', 'Model Comparison'],
- ] as const).map(([tab, label]) => (
-
- ))}
+
+ {!officialOnlyState && (
+
+ Showing official + unofficial results
+ )}
- {graphSubTab === 'scatter' && (
-
- )}
- {graphSubTab === 'heatmap' && (
-
- )}
- {graphSubTab === 'distribution' && (
-
- )}
- {graphSubTab === 'radar' && (
-
- )}
-
-
-
- ) : (
-
- )}
-
-
+ {view === 'graphs' ? (
+
+ {/* Graph sub-tabs */}
+
+ {([
+ ['scatter', 'Scatter Plots'],
+ ['heatmap', 'Task Heatmap'],
+ ['distribution', 'Score Distribution'],
+ ['radar', 'Model Comparison'],
+ ] as const).map(([tab, label]) => (
+
+ ))}
+
+
+ {graphSubTab === 'scatter' && (
+
+ )}
+ {graphSubTab === 'heatmap' && (
+
+ )}
+ {graphSubTab === 'distribution' && (
+
+ )}
+ {graphSubTab === 'radar' && (
+
+ )}
+
+
+
+ ) : (
+
+ )}
+
+
+
)
}
diff --git a/components/task-heatmap.tsx b/components/task-heatmap.tsx
index 99258f6..144cea6 100644
--- a/components/task-heatmap.tsx
+++ b/components/task-heatmap.tsx
@@ -10,6 +10,7 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/comp
interface TaskHeatmapProps {
entries: LeaderboardEntry[]
+ scoreMode: 'best' | 'average'
selectedCategories: string[]
onCategoriesChange: (categories: string[]) => void
}
@@ -38,7 +39,7 @@ function getScoreTextColor(ratio: number): string {
return 'hsl(0, 70%, 75%)'
}
-export function TaskHeatmap({ entries, selectedCategories, onCategoriesChange }: TaskHeatmapProps) {
+export function TaskHeatmap({ entries, scoreMode, selectedCategories, onCategoriesChange }: TaskHeatmapProps) {
const [modelData, setModelData] = useState([])
const [loading, setLoading] = useState(true)
const [error, setError] = useState(null)
diff --git a/lib/mock-data.ts b/lib/mock-data.ts
index 9e50464..3ce0698 100644
--- a/lib/mock-data.ts
+++ b/lib/mock-data.ts
@@ -50,6 +50,7 @@ export const mockSubmissions: Record = {
provider: "anthropic",
timestamp: "2026-02-10T15:30:00Z",
openclaw_version: "2.1.0",
+ benchmark_version: "v1.0.0",
total_score: 9.2,
max_score: 10.0,
metadata: {