From affb1d9a1e9dc2c86618b85e5af9d75eb39c5753 Mon Sep 17 00:00:00 2001 From: Martin Schrimpf Date: Mon, 8 Jul 2024 15:52:03 +0200 Subject: [PATCH 1/7] first leaderboard table for benchmarking competition --- .../templates/benchmarks/benchmark_table.html | 108 ++++++++++++++++++ .../templates/benchmarks/competition2024.html | 38 ++++++ benchmarks/views/competition2024.py | 87 +++++++++++++- 3 files changed, 232 insertions(+), 1 deletion(-) create mode 100644 benchmarks/templates/benchmarks/benchmark_table.html diff --git a/benchmarks/templates/benchmarks/benchmark_table.html b/benchmarks/templates/benchmarks/benchmark_table.html new file mode 100644 index 000000000..084298946 --- /dev/null +++ b/benchmarks/templates/benchmarks/benchmark_table.html @@ -0,0 +1,108 @@ +{% load static %} + +{% if models %} + {% if has_user %} +
+ {% endif %} +
+ + + + + + {% for benchmark in benchmarks %} + + {% endfor %} + + + + + + + {% for score_row in model.scores %} + + {% endfor %} + + {% for model in models %} + + + {% for score_row in model.scores %} + + {% endfor %} + + {% endfor %} + +

+ + Model + +

+

+ {% if benchmark.benchmark_type.parent %} + {# indent, depending on depth #} + + {{ benchmark_parents|get_initial_characters:benchmark.identifier }} + + {% endif %} + + {# reference link #} + {% if benchmark.benchmark_type.reference and benchmark.benchmark_type.reference.url %} + + {% endif %} + + {# identifier #} + {{ benchmark.short_name|simplify_domain }} + + {% if benchmark.benchmark_type.reference and benchmark.benchmark_type.reference.url %} + + {% endif %} +

+
+ Average model scores +
+
+ {{ score_row.score_ceiled }} +
+ + + {{ score_row.score_ceiled }} +
+
+ +
+ Model scores on brain benchmarks. + Click on a model to see more details. + The more green and bright a cell, the better the model's score. + Scores are ceiled, hover the benchmark to see ceilings. +
+{% if has_user %} +
+{% endif %} +{% else %} +

No data.

+{% endif %} diff --git a/benchmarks/templates/benchmarks/competition2024.html b/benchmarks/templates/benchmarks/competition2024.html index 39f45a7c5..46b611d6d 100644 --- a/benchmarks/templates/benchmarks/competition2024.html +++ b/benchmarks/templates/benchmarks/competition2024.html @@ -130,6 +130,44 @@

Common critiques of the Brain-Score platform + {# leaderboard #} +
+

Competition Leaderboard

+ +
+
+
+
+

+ Behavioral Track +

+
+
+
+ {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_behavior_vision models=models_behavior_vision %} +
+
+
+
+ +
+
+
+

+ Neural Track +

+
+
+
+ {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_neural_vision models=models_neural_vision %} +
+
+
+
+
+
+ + {# overview #}

Overview

diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index 0f29fec53..3046ccce5 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -1,6 +1,91 @@ from django.shortcuts import render +from .index import get_context +from ..models import User + def view(request): - context = {} + # model filter + included_models = [ + "cvt_cvt-w24-384-in22k_finetuned-in1k_4", + "resnext101_32x8d_wsl", + "effnetb1_cutmixpatch_SAM_", + "effnetb1_cutmixpatch_augmix_robust32_avge4e7_manylayers_324x288", + "resnext101_32x32d_wsl", + "effnetb1_272x240", + "resnext101_32x48d_wsl", + "pnasnet_large", + "resnet-152_v2", + "focalnet_tiny_lrf_in1k", + "hmax", + "alexnet", + "CORnet-S", + "resnet-50-robust", + "voneresnet-50-non_stochastic", + "resnet18-local_aggregation", + "grcnn_robust_v1", + "custom_model_cv_18_dagger_408", + "ViT_L_32_imagenet1k", + "mobilenet_v2_1.4_224", + "pixels", + ] + assert len(included_models) == 21 + model_filter = dict(model__name__in=included_models) + + # benchmark filter + track_benchmarks = { + "behavior_vision": [ + "average_vision", + "behavior_vision", + + "Hebart2023-match", + + # "Baker2022", + # "Baker2022-accuracy_delta_frankenstein", "Baker2022-accuracy_delta_fragmented", + # "Baker2022-inverted_accuracy_delta", + + "Coggan2024" + "Coggan2024_behavior-ConditionWiseAccuracySimilarity", + + "BMD2024", + "BMD2024.texture_1Behavioral-accuracy_distance", + "BMD2024.texture_2Behavioral-accuracy_distance", + "BMD2024.dotted_1Behavioral-accuracy_distance", + "BMD2024.dotted_2Behavioral-accuracy_distance", + + "Malania2007", + "Malania2007.short2-threshold_elevation", "Malania2007.short4-threshold_elevation", + "Malania2007.short6-threshold_elevation", "Malania2007.short8-threshold_elevation", + "Malania2007.short16-threshold_elevation", "Malania2007.equal2-threshold_elevation", + "Malania2007.long2-threshold_elevation", "Malania2007.equal16-threshold_elevation", + "Malania2007.long16-threshold_elevation", "Malania2007.vernieracuity-threshold", + + "Maniquet2024", + "Maniquet2024-confusion_similarity", "Maniquet2024-tasks_consistency", + ], + "neural_vision": [ + "average_vision", + "neural_vision", + "V1", "V2", "V4", "IT", + "Bracci2019.anteriorVTC-rdm", + "Coggan2024_fMRI.V1-rdm", + "Coggan2024_fMRI.V2-rdm", + "Coggan2024_fMRI.V4-rdm", + "Coggan2024_fMRI.IT-rdm", + ] + } + admin_user = User.objects.get(id=2) + context = {'leaderboard_keys': ['behavior_vision', 'neural_vision']} + for key, key_benchmarks in track_benchmarks.items(): + if key == 'neural_vision': continue # fixme + benchmark_filter = lambda benchmarks: benchmarks.filter(identifier__in=key_benchmarks) + key_context = get_context(benchmark_filter=benchmark_filter, + model_filter=model_filter, + user=admin_user, + domain="vision", show_public=True) + key_context[f"benchmarks_{key}"] = key_context['benchmarks'] + key_context[f"models_{key}"] = key_context['models'] + del key_context['benchmarks'], key_context['models'] + context = {**context, **key_context} + return render(request, 'benchmarks/competition2024.html', context) From 08c9ccc8e7f6927a070c05afd996fb8f0f68d509 Mon Sep 17 00:00:00 2001 From: Martin Schrimpf Date: Wed, 10 Jul 2024 09:27:15 +0200 Subject: [PATCH 2/7] contentize benchmark tables; fix identifiers --- .../templates/benchmarks/benchmark_table.html | 21 +-------- .../templates/benchmarks/competition2024.html | 45 ++++++------------- benchmarks/views/competition2024.py | 37 ++++++++------- 3 files changed, 35 insertions(+), 68 deletions(-) diff --git a/benchmarks/templates/benchmarks/benchmark_table.html b/benchmarks/templates/benchmarks/benchmark_table.html index 084298946..05e2bb1f6 100644 --- a/benchmarks/templates/benchmarks/benchmark_table.html +++ b/benchmarks/templates/benchmarks/benchmark_table.html @@ -21,7 +21,7 @@ title="ceiling: {{ benchmark.ceiling }}" {% endif %} data-benchmark="{{ benchmark.short_name }}" - data-parent="{{ benchmark_parents|get_parent_item:benchmark.identifier }}" +{# data-parent="{{ benchmark_parents|get_parent_item:benchmark.identifier }}"#} class="rotate depth_{{ benchmark.depth }}" >

@@ -53,27 +53,10 @@ - - -

- Average model scores -
- - {% for score_row in model.scores %} - - {{ score_row.score_ceiled }} - - {% endfor %} - {% for model in models %}
- {{ model|display_model:user }} @@ -82,7 +65,7 @@ {% for score_row in model.scores %} {{ score_row.score_ceiled }} diff --git a/benchmarks/templates/benchmarks/competition2024.html b/benchmarks/templates/benchmarks/competition2024.html index 46b611d6d..9f754e53f 100644 --- a/benchmarks/templates/benchmarks/competition2024.html +++ b/benchmarks/templates/benchmarks/competition2024.html @@ -134,36 +134,14 @@

Common critiques of the Brain-Score platform

Competition Leaderboard

-
-
-
-
-

- Behavioral Track -

-
-
-
- {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_behavior_vision models=models_behavior_vision %} -
-
-
-
+

Behavioral Track

+
+ {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_behavior_vision models=models_behavior_vision %} +
-
-
-
-

- Neural Track -

-
-
-
- {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_neural_vision models=models_neural_vision %} -
-
-
-
+

Neural Track

+
+ {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_neural_vision models=models_neural_vision %}
@@ -292,7 +270,8 @@

Models

effnetb1_cutmixpatch_SAM_
  • effnetb1_cutmixpatch_augmix_robust32_avge4e7_manylayers_324x288 - (Winner of the 2022 competition)
  • + (Winner of the 2022 competition) +
  • resnext101_32x32d_wsl
  • @@ -325,9 +304,11 @@

    Models

  • resnet18-local_aggregation
  • - grcnn_robust_v1 (Top-3 competition 2022)
  • + grcnn_robust_v1 (Top-3 competition 2022) +
  • - custom_model_cv_18_dagger_408 (Top-3 competition 2022)
  • + custom_model_cv_18_dagger_408 (Top-3 competition 2022) +
  • ViT_L_32_imagenet1k
  • diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index 3046ccce5..dee715c1d 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -40,12 +40,11 @@ def view(request): "Hebart2023-match", - # "Baker2022", - # "Baker2022-accuracy_delta_frankenstein", "Baker2022-accuracy_delta_fragmented", - # "Baker2022-inverted_accuracy_delta", + "Baker2022", + "Baker2022inverted-accuracy_delta", "Baker2022fragmented-accuracy_delta", + "Baker2022frankenstein-accuracy_delta", - "Coggan2024" - "Coggan2024_behavior-ConditionWiseAccuracySimilarity", + "tong.Coggan2024_behavior-ConditionWiseAccuracySimilarity", "BMD2024", "BMD2024.texture_1Behavioral-accuracy_distance", @@ -53,31 +52,35 @@ def view(request): "BMD2024.dotted_1Behavioral-accuracy_distance", "BMD2024.dotted_2Behavioral-accuracy_distance", - "Malania2007", - "Malania2007.short2-threshold_elevation", "Malania2007.short4-threshold_elevation", - "Malania2007.short6-threshold_elevation", "Malania2007.short8-threshold_elevation", - "Malania2007.short16-threshold_elevation", "Malania2007.equal2-threshold_elevation", - "Malania2007.long2-threshold_elevation", "Malania2007.equal16-threshold_elevation", - "Malania2007.long16-threshold_elevation", "Malania2007.vernieracuity-threshold", - "Maniquet2024", "Maniquet2024-confusion_similarity", "Maniquet2024-tasks_consistency", + + "Malania2007", + "Malania2007.short2", "Malania2007.short4", "Malania2007.short6", "Malania2007.short8", + "Malania2007.short16", "Malania2007.equal2", "Malania2007.long2", "Malania2007.equal16", + "Malania2007.long16", "Malania2007.vernieracuity-threshold", + + "Scialom2024", + "Scialom2024_phosphenes-allBehavioralAccuracyDistance", + "Scialom2024_segments-allBehavioralAccuracyDistance", + "Scialom2024_phosphenes-100BehavioralAccuracyDistance", + "Scialom2024_segments-100BehavioralAccuracyDistance", ], "neural_vision": [ "average_vision", "neural_vision", "V1", "V2", "V4", "IT", "Bracci2019.anteriorVTC-rdm", - "Coggan2024_fMRI.V1-rdm", - "Coggan2024_fMRI.V2-rdm", - "Coggan2024_fMRI.V4-rdm", - "Coggan2024_fMRI.IT-rdm", + "Coggan2024", + "tong.Coggan2024_fMRI.V1-rdm", + "tong.Coggan2024_fMRI.V2-rdm", + "tong.Coggan2024_fMRI.V4-rdm", + "tong.Coggan2024_fMRI.IT-rdm", ] } admin_user = User.objects.get(id=2) context = {'leaderboard_keys': ['behavior_vision', 'neural_vision']} for key, key_benchmarks in track_benchmarks.items(): - if key == 'neural_vision': continue # fixme benchmark_filter = lambda benchmarks: benchmarks.filter(identifier__in=key_benchmarks) key_context = get_context(benchmark_filter=benchmark_filter, model_filter=model_filter, From fb7d9be6fd2166b1cc345b8d0ee79ef795af100c Mon Sep 17 00:00:00 2001 From: Martin Schrimpf Date: Thu, 11 Jul 2024 10:11:58 +0200 Subject: [PATCH 3/7] update model and benchmark list --- benchmarks/views/competition2024.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index dee715c1d..779132c4b 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -14,22 +14,22 @@ def view(request): "resnext101_32x32d_wsl", "effnetb1_272x240", "resnext101_32x48d_wsl", - "pnasnet_large", - "resnet-152_v2", + "pnasnet_large_pytorch", + "resnet-152_v2_pytorch", "focalnet_tiny_lrf_in1k", "hmax", "alexnet", "CORnet-S", "resnet-50-robust", "voneresnet-50-non_stochastic", - "resnet18-local_aggregation", - "grcnn_robust_v1", + # "resnet18-local_aggregation", # TF no longer supported + # "grcnn_robust_v1", # weights deleted on user server "custom_model_cv_18_dagger_408", "ViT_L_32_imagenet1k", - "mobilenet_v2_1.4_224", + "mobilenet_v2_1.4_224_pytorch", "pixels", ] - assert len(included_models) == 21 + assert len(included_models) == 19 model_filter = dict(model__name__in=included_models) # benchmark filter @@ -61,10 +61,19 @@ def view(request): "Malania2007.long16", "Malania2007.vernieracuity-threshold", "Scialom2024", - "Scialom2024_phosphenes-allBehavioralAccuracyDistance", - "Scialom2024_segments-allBehavioralAccuracyDistance", - "Scialom2024_phosphenes-100BehavioralAccuracyDistance", - "Scialom2024_segments-100BehavioralAccuracyDistance", + "Scialom2024_rgb-behavioral_accuracy", + "Scialom2024_phosphenes-all-behavioral_accuracy", + "Scialom2024_segments-all-behavioral_accuracy", + "Scialom2024_phosphenes-100-behavioral_accuracy", + "Scialom2024_segments-100-behavioral_accuracy", + + "Ferguson2024", + "Ferguson2024circle_line-value_delta", "Ferguson2024color-value_delta", + "Ferguson2024convergence-value_delta", "Ferguson2024eighth-value_delta", + "Ferguson2024gray_easy-value_delta", "Ferguson2024gray_hard-value_delta", "Ferguson2024half-value_delta", + "Ferguson2024juncture-value_delta", "Ferguson2024lle-value_delta", "Ferguson2024llh-value_delta", + "Ferguson2024quarter-value_delta", "Ferguson2024round_f-value_delta", "Ferguson2024round_v-value_delta", + "Ferguson2024tilted_line-value_delta" ], "neural_vision": [ "average_vision", From b4393af140c562764fdb335b8c4b7693354a77df Mon Sep 17 00:00:00 2001 From: Martin Schrimpf Date: Thu, 11 Jul 2024 11:02:05 +0200 Subject: [PATCH 4/7] update mobilenet identifier --- benchmarks/views/competition2024.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index 779132c4b..75c650c96 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -26,7 +26,7 @@ def view(request): # "grcnn_robust_v1", # weights deleted on user server "custom_model_cv_18_dagger_408", "ViT_L_32_imagenet1k", - "mobilenet_v2_1.4_224_pytorch", + "mobilenet_v2_1-4_224_pytorch", "pixels", ] assert len(included_models) == 19 From 46e4e1f3374b2d1b9263bc7f0dff02e5e176d712 Mon Sep 17 00:00:00 2001 From: Martin Schrimpf Date: Thu, 11 Jul 2024 15:32:17 +0200 Subject: [PATCH 5/7] fix effnet identifier; exclude internal submissions for now --- benchmarks/views/competition2024.py | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index 75c650c96..8afc99889 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -9,7 +9,7 @@ def view(request): included_models = [ "cvt_cvt-w24-384-in22k_finetuned-in1k_4", "resnext101_32x8d_wsl", - "effnetb1_cutmixpatch_SAM_", + "effnetb1_cutmixpatch_SAM_robust32_avge6e8e9e10_manylayers_324x288", "effnetb1_cutmixpatch_augmix_robust32_avge4e7_manylayers_324x288", "resnext101_32x32d_wsl", "effnetb1_272x240", @@ -55,25 +55,25 @@ def view(request): "Maniquet2024", "Maniquet2024-confusion_similarity", "Maniquet2024-tasks_consistency", - "Malania2007", - "Malania2007.short2", "Malania2007.short4", "Malania2007.short6", "Malania2007.short8", - "Malania2007.short16", "Malania2007.equal2", "Malania2007.long2", "Malania2007.equal16", - "Malania2007.long16", "Malania2007.vernieracuity-threshold", - - "Scialom2024", - "Scialom2024_rgb-behavioral_accuracy", - "Scialom2024_phosphenes-all-behavioral_accuracy", - "Scialom2024_segments-all-behavioral_accuracy", - "Scialom2024_phosphenes-100-behavioral_accuracy", - "Scialom2024_segments-100-behavioral_accuracy", - - "Ferguson2024", - "Ferguson2024circle_line-value_delta", "Ferguson2024color-value_delta", - "Ferguson2024convergence-value_delta", "Ferguson2024eighth-value_delta", - "Ferguson2024gray_easy-value_delta", "Ferguson2024gray_hard-value_delta", "Ferguson2024half-value_delta", - "Ferguson2024juncture-value_delta", "Ferguson2024lle-value_delta", "Ferguson2024llh-value_delta", - "Ferguson2024quarter-value_delta", "Ferguson2024round_f-value_delta", "Ferguson2024round_v-value_delta", - "Ferguson2024tilted_line-value_delta" + # "Malania2007", + # "Malania2007.short2", "Malania2007.short4", "Malania2007.short6", "Malania2007.short8", + # "Malania2007.short16", "Malania2007.equal2", "Malania2007.long2", "Malania2007.equal16", + # "Malania2007.long16", "Malania2007.vernieracuity-threshold", + # + # "Scialom2024", + # "Scialom2024_rgb-behavioral_accuracy", + # "Scialom2024_phosphenes-all-behavioral_accuracy", + # "Scialom2024_segments-all-behavioral_accuracy", + # "Scialom2024_phosphenes-100-behavioral_accuracy", + # "Scialom2024_segments-100-behavioral_accuracy", + # + # "Ferguson2024", + # "Ferguson2024circle_line-value_delta", "Ferguson2024color-value_delta", + # "Ferguson2024convergence-value_delta", "Ferguson2024eighth-value_delta", + # "Ferguson2024gray_easy-value_delta", "Ferguson2024gray_hard-value_delta", "Ferguson2024half-value_delta", + # "Ferguson2024juncture-value_delta", "Ferguson2024lle-value_delta", "Ferguson2024llh-value_delta", + # "Ferguson2024quarter-value_delta", "Ferguson2024round_f-value_delta", "Ferguson2024round_v-value_delta", + # "Ferguson2024tilted_line-value_delta" ], "neural_vision": [ "average_vision", From d26fcff2ee99eb6f9bd9621aee0d253d506de106 Mon Sep 17 00:00:00 2001 From: Martin Schrimpf Date: Sat, 13 Jul 2024 08:22:32 +0200 Subject: [PATCH 6/7] use faulty Maniquet identifier --- benchmarks/views/competition2024.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index 8afc99889..a7a95531e 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -53,7 +53,7 @@ def view(request): "BMD2024.dotted_2Behavioral-accuracy_distance", "Maniquet2024", - "Maniquet2024-confusion_similarity", "Maniquet2024-tasks_consistency", + "Maniquet2024-confusion_similarity'", "Maniquet2024-tasks_consistency", # "Malania2007", # "Malania2007.short2", "Malania2007.short4", "Malania2007.short6", "Malania2007.short8", From 983da2e0e4190efe90fb4ca5f7c21be7b86c74fa Mon Sep 17 00:00:00 2001 From: Martin Schrimpf Date: Sat, 13 Jul 2024 09:42:26 +0200 Subject: [PATCH 7/7] show benchmark averages --- .../templates/benchmarks/benchmark_table.html | 6 +- benchmarks/views/competition2024.py | 3 +- benchmarks/views/index.py | 92 ++++++++++++------- 3 files changed, 64 insertions(+), 37 deletions(-) diff --git a/benchmarks/templates/benchmarks/benchmark_table.html b/benchmarks/templates/benchmarks/benchmark_table.html index 05e2bb1f6..4dbe9ff0f 100644 --- a/benchmarks/templates/benchmarks/benchmark_table.html +++ b/benchmarks/templates/benchmarks/benchmark_table.html @@ -51,13 +51,15 @@ {% endfor %} - + {% for model in models %} diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index a7a95531e..afff56589 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -94,7 +94,8 @@ def view(request): key_context = get_context(benchmark_filter=benchmark_filter, model_filter=model_filter, user=admin_user, - domain="vision", show_public=True) + domain="vision", show_public=True, + compute_benchmark_average=True) key_context[f"benchmarks_{key}"] = key_context['benchmarks'] key_context[f"models_{key}"] = key_context['models'] del key_context['benchmarks'], key_context['models'] diff --git a/benchmarks/views/index.py b/benchmarks/views/index.py index 678a3b32c..3d26b6b39 100644 --- a/benchmarks/views/index.py +++ b/benchmarks/views/index.py @@ -39,10 +39,12 @@ def view(request, domain: str): return render(request, 'benchmarks/leaderboard/leaderboard.html', context) -def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_filter=None, show_public=False): +def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_filter=None, show_public=False, + compute_benchmark_average: bool = False): benchmarks = _collect_benchmarks(domain, user_page=True if user is not None else False, benchmark_filter=benchmark_filter) - model_rows = _collect_models(domain, benchmarks, show_public, user, score_filter=model_filter) + model_rows = _collect_models(domain, benchmarks, show_public, user, score_filter=model_filter, + compute_benchmark_average=compute_benchmark_average) # to save vertical space, we strip the lab name in front of benchmarks. uniform_benchmarks = {} # keeps the original benchmark name @@ -77,35 +79,34 @@ def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_ if domain is "vision": citation_domain_url = 'https://www.biorxiv.org/content/early/2018/09/05/407007' citation_domain_title = "Brain-Score: Which Artificial Neural Network for Object Recognition is most " \ - "Brain-Like? " - citation_domain_bibtex = "@article{SchrimpfKubilius2018BrainScore,\n\t\t\t\t" \ - "title={Brain-Score: Which Artificial Neural Network for Object Recognition is most Brain-Like?},\n\t\t\t\t" \ - "author={Martin Schrimpf and Jonas Kubilius and Ha Hong and Najib J. Majaj and " \ - "Rishi Rajalingham and Elias B. Issa and Kohitij Kar and Pouya Bashivan and Jonathan " \ - "Prescott-Roy and Franziska Geiger and Kailyn Schmidt and Daniel L. K. Yamins and James J. DiCarlo},\n\t\t\t\t" \ - "journal={bioRxiv preprint},\n\t\t\t\t" \ - "year={2018},\n\t\t\t\t" \ - "url={https://www.biorxiv.org/content/10.1101/407007v2}\n\t\t\t}" + "Brain-Like?" + citation_domain_bibtex = "@article{SchrimpfKubilius2018BrainScore,\n" \ + "title={Brain-Score: Which Artificial Neural Network for Object Recognition is most Brain-Like?},\n" \ + "author={Martin Schrimpf and Jonas Kubilius and Ha Hong and Najib J. Majaj and " \ + "Rishi Rajalingham and Elias B. Issa and Kohitij Kar and Pouya Bashivan and Jonathan " \ + "Prescott-Roy and Franziska Geiger and Kailyn Schmidt and Daniel L. K. Yamins and James J. DiCarlo},\n" \ + "journal={bioRxiv preprint},\n" \ + "year={2018},\n" \ + "url={https://www.biorxiv.org/content/10.1101/407007v2}\n}" elif domain is "language": citation_domain_url = 'https://www.pnas.org/content/118/45/e2105646118' citation_domain_title = "The neural architecture of language: Integrative modeling converges on predictive processing" - citation_domain_bibtex = "@article{schrimpf2021neural,\n\t\t\t\t" \ - "title={The neural architecture of language: Integrative modeling converges on predictive processing},\n\t\t\t\t" \ - "author={Schrimpf, Martin and Blank, Idan Asher and Tuckute, Greta and Kauf, Carina " \ - "and Hosseini, Eghbal A and Kanwisher, Nancy and Tenenbaum, Joshua B and Fedorenko, Evelina},\n\t\t\t\t" \ - "journal={Proceedings of the National Academy of Sciences},\n\t\t\t\t" \ - "volume={118},\n\t\t\t\t" \ - "number={45},\n\t\t\t\t" \ - "pages={e2105646118},\n\t\t\t\t" \ - "year={2021},\n\t\t\t\t" \ - "publisher={National Acad Sciences}\n\t\t\t" \ - "}" + citation_domain_bibtex = "@article{schrimpf2021neural,\n" \ + "title={The neural architecture of language: Integrative modeling converges on predictive processing},\n" \ + "author={Schrimpf, Martin and Blank, Idan Asher and Tuckute, Greta and Kauf, Carina " \ + "and Hosseini, Eghbal A and Kanwisher, Nancy and Tenenbaum, Joshua B and Fedorenko, Evelina},\n" \ + "journal={Proceedings of the National Academy of Sciences},\n" \ + "volume={118},\n" \ + "number={45},\n" \ + "pages={e2105646118},\n" \ + "year={2021},\n" \ + "publisher={National Acad Sciences}\n" \ + "}" else: citation_domain_url = '' citation_domain_title = '' citation_domain_bibtex = '' - benchmark_names = [b.identifier for b in list(filter(lambda b: b.number_of_all_children == 0, benchmarks))] return {'domain': domain, 'models': model_rows, 'benchmarks': benchmarks, 'benchmark_names': benchmark_names, @@ -115,14 +116,14 @@ def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_ "comparison_data": json.dumps(comparison_data), 'citation_general_url': 'https://www.cell.com/neuron/fulltext/S0896-6273(20)30605-X', 'citation_general_title': 'Integrative Benchmarking to Advance Neurally Mechanistic Models of Human Intelligence', - 'citation_general_bibtex': '@article{Schrimpf2020integrative,\n\t\t\t\t' + 'citation_general_bibtex': '@article{Schrimpf2020integrative,\n' 'title={Integrative Benchmarking to Advance ' - 'Neurally Mechanistic Models of Human Intelligence},\n\t\t\t\t' + 'Neurally Mechanistic Models of Human Intelligence},\n' 'author={Schrimpf, Martin and Kubilius, Jonas and Lee, Michael J and Murty, ' - 'N Apurva Ratan and Ajemian, Robert and DiCarlo, James J},\n\t\t\t\t' - 'journal={Neuron},\n\t\t\t\t' - 'year={2020},\n\t\t\t\t' - 'url={https://www.cell.com/neuron/fulltext/S0896-6273(20)30605-X}\n\t\t\t}', + 'N Apurva Ratan and Ajemian, Robert and DiCarlo, James J},\n' + 'journal={Neuron},\n' + 'year={2020},\n' + 'url={https://www.cell.com/neuron/fulltext/S0896-6273(20)30605-X}\n}', 'citation_domain_url': citation_domain_url, 'citation_domain_title': citation_domain_title, 'citation_domain_bibtex': citation_domain_bibtex, @@ -236,7 +237,8 @@ def _collect_submittable_benchmarks(benchmarks, user): return benchmark_selection -def _collect_models(domain: str, benchmarks, show_public, user=None, score_filter=None): +def _collect_models(domain: str, benchmarks, show_public, user=None, score_filter=None, + compute_benchmark_average: bool = False): """ :param user: The user whose profile we are currently on, if any """ @@ -388,7 +390,7 @@ def _collect_models(domain: str, benchmarks, show_public, user=None, score_filte color=representative_color(None, min_value=0, max_value=1), comment="") # - convert scores DataFrame into rows - data = [] + model_rows = [] for model_id, group in tqdm(scores.groupby('model'), desc='model rows'): model_scores = {} # fill in computed scores @@ -444,10 +446,31 @@ def _collect_models(domain: str, benchmarks, show_public, user=None, score_filte scores=model_scores, rank=rank, build_status=build_status, submitter=submitter, submission_id=submission_id, jenkins_id=jenkins_id, timestamp=timestamp ) - data.append(model_row) - data = list(sorted(data, key=lambda model_row: model_row.rank)) + model_rows.append(model_row) + + model_rows = list(sorted(model_rows, key=lambda model_row: model_row.rank)) + + if compute_benchmark_average: + benchmark_averages = scores.fillna(0).groupby('benchmark').mean() + model_scores = [ScoreDisplay( + benchmark=benchmark.identifier, + versioned_benchmark_identifier=benchmark_averages.loc[benchmark.identifier]['benchmark_version'], + score_ceiled=represent(benchmark_averages.loc[benchmark.identifier]['score_ceiled']), + score_raw=benchmark_averages.loc[benchmark.identifier]['score_raw'], + error=None, + color='gray', comment=None) + for benchmark in benchmarks] + average_row = ModelRow( + id=None, + name="Benchmark average", + reference_identifier=None, reference_link=None, + user=None, public=True, competition=None, domain=domain, + scores=model_scores, rank=None, build_status=None, + submitter=None, submission_id=None, jenkins_id=None, timestamp=None + ) + model_rows.insert(0, average_row) - return data + return model_rows def _get_benchmark_shortname(benchmark_type_identifier: str): @@ -554,6 +577,7 @@ def get_visibility(model, user): else: return "public" + # Adds python functions so the HTML can do several things @register.filter def get_item(dictionary, key):