From e2404b68aa133f732cb382af29f3deefb7400407 Mon Sep 17 00:00:00 2001
From: Carol Jiang <cjiang24@evnode.ib.cluster>
Date: Tue, 7 Nov 2023 11:39:19 -0500
Subject: [PATCH 01/12] test index.html

---
 docs2/Makefile                                |  20 +
 docs2/requirements.txt                        |   5 +
 docs2/source/conf.py                          |  83 ++++
 docs2/source/document_plugins.py              | 146 +++++++
 docs2/source/examples.md                      |   1 +
 docs2/source/index.rst                        |  28 ++
 ...brainscore_language.artificial_subject.rst |  29 ++
 .../brainscore_language.benchmark_helpers.rst |  31 ++
 ...score_language.model_helpers.container.rst |  29 ++
 ...score_language.model_helpers.embedding.rst |  38 ++
 ...ore_language.model_helpers.huggingface.rst |  29 ++
 ...e_language.model_helpers.preprocessing.rst |  29 ++
 .../brainscore_language.model_helpers.rst     |  34 ++
 .../_autosummary/brainscore_language.rst      |  55 +++
 .../brainscore_language.submission.config.rst |  29 ++
 .../brainscore_language.submission.rst        |  32 ++
 .../brainscore_language.utils.ceiling.rst     |  29 ++
 .../brainscore_language.utils.rst             |  41 ++
 .../brainscore_language.utils.s3.rst          |  30 ++
 ...inscore_language.utils.transformations.rst |  43 +++
 .../brainscore_language.utils.xarray.rst      |  31 ++
 docs2/source/modules/api_reference.rst        |  10 +
 docs2/source/modules/artificial_subject.rst   |   9 +
 docs2/source/modules/benchmark_tutorial.rst   | 364 ++++++++++++++++++
 docs2/source/modules/model_tutorial.rst       |  93 +++++
 docs2/source/modules/plugins.rst              | 106 +++++
 26 files changed, 1374 insertions(+)
 create mode 100644 docs2/Makefile
 create mode 100644 docs2/requirements.txt
 create mode 100644 docs2/source/conf.py
 create mode 100644 docs2/source/document_plugins.py
 create mode 100644 docs2/source/examples.md
 create mode 100644 docs2/source/index.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.submission.config.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.submission.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst
 create mode 100644 docs2/source/modules/api_reference.rst
 create mode 100644 docs2/source/modules/artificial_subject.rst
 create mode 100644 docs2/source/modules/benchmark_tutorial.rst
 create mode 100644 docs2/source/modules/model_tutorial.rst
 create mode 100644 docs2/source/modules/plugins.rst

diff --git a/docs2/Makefile b/docs2/Makefile
new file mode 100644
index 00000000..92dd33a1
--- /dev/null
+++ b/docs2/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs2/requirements.txt b/docs2/requirements.txt
new file mode 100644
index 00000000..b896045d
--- /dev/null
+++ b/docs2/requirements.txt
@@ -0,0 +1,5 @@
+-e .
+
+Sphinx>=4
+sphinx_rtd_theme
+sphinxcontrib-bibtex
diff --git a/docs2/source/conf.py b/docs2/source/conf.py
new file mode 100644
index 00000000..f6e384e5
--- /dev/null
+++ b/docs2/source/conf.py
@@ -0,0 +1,83 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../..'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'Brain-Score Language'
+copyright = '2022, Brain-Score Team'
+author = 'Brain-Score Team'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'recommonmark',
+    'sphinx.ext.viewcode',
+    'sphinx_rtd_theme',
+    'sphinxcontrib.bibtex'
+]
+autosummary_generate = True  # Turn on sphinx.ext.autosummary
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+source_suffix = ['.rst', '.md']
+
+# The master toctree document.
+master_doc = 'index'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# allows BibTeX citations to be inserted into docs
+bibtex_bibfiles = ['bibtex/refs.bib']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+
+def skip(app, what, name, obj, would_skip, options):
+    if name in ("__init__", "__call__"):
+        return False
+    if name.startswith('test') or name.startswith('Test'):
+        return False
+    return would_skip
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", skip)
diff --git a/docs2/source/document_plugins.py b/docs2/source/document_plugins.py
new file mode 100644
index 00000000..c51016a0
--- /dev/null
+++ b/docs2/source/document_plugins.py
@@ -0,0 +1,146 @@
+""" Make plugin details available to readthedocs """
+
+import json
+import logging
+from pathlib import Path
+import re
+from rstcloth import RstCloth
+from typing import Dict, List, Union
+
+BIBS_DIR = 'docs/source/bibtex/'
+PLUGINS_DOC_FP = 'docs/source/modules/plugins.rst'
+PLUGINS_LIST_FP = 'brainscore_language/plugin_management/all_plugins.json'
+PLUGIN_DIRS = ['benchmarks', 'data', 'metrics', 'models']
+PLUGIN_TYPE_MAP = {plugin_dirtype:plugin_dirtype.strip('s') for plugin_dirtype in PLUGIN_DIRS}
+
+
+def _get_module_plugin_names(plugin_type:str, plugin_dir:Path) -> List[str]:
+    """ Returns list of plugins registered by module """
+    init_fp = plugin_dir / "__init__.py"
+    registry = PLUGIN_TYPE_MAP[plugin_type] + "_registry"
+
+    with open(init_fp, 'r') as f:
+        text = f.read()
+        registered_plugins = re.findall(registry+'\[(.*)\]', text)
+        cleaned_plugin_names = [name.replace('"', '').replace('\'', '') for name in registered_plugins]
+    
+    return cleaned_plugin_names
+
+def _id_from_bibtex(bibtex:str) -> str:
+    """ Returns BibTeX identifier from BibTeX """
+    return re.search('\{(.*?),', bibtex).group(1)
+
+def get_all_plugin_info() -> Dict[str, Dict[str, Dict[str, Union[list, str, str]]]]:
+    """Add all plugins to respective type registries
+
+    Returns a dict where key is plugin type,
+    value is a dict where key is name of plugin dir,
+    value is a dict of plugin info:
+
+    plugin_names: list of names of all plugins registered by module
+    bibtex: a BibTeX string
+    bibtex_id: BibTeX identifier
+    """
+    all_plugin_info = {}
+    for plugin_type in PLUGIN_DIRS:
+        plugins_dir = Path(Path(__file__).parents[1], plugin_type)
+        for plugin_dir in plugins_dir.glob('[!._]*'):
+            plugin_dirname = str(plugin_dir)
+  
+            if plugin_type not in all_plugin_info:
+                all_plugin_info[plugin_type] = {plugin_dirname:{}}
+            else:
+                all_plugin_info[plugin_type].update({plugin_dirname:{}})
+
+            plugin_dir_dict = all_plugin_info[plugin_type][plugin_dirname]
+
+            plugin_names = _get_module_plugin_names(plugin_type, plugin_dir)
+            plugin_module_path = plugin_dirname.replace('/', '.')
+            plugin_module = __import__(plugin_module_path, fromlist=['BIBTEX'])
+
+            plugin_dir_dict['plugin_names'] = plugin_names
+            if hasattr(plugin_module, 'BIBTEX'):
+                plugin_dir_dict['bibtex'] = plugin_module.BIBTEX
+                plugin_dir_dict['bibtex_id'] = _id_from_bibtex(plugin_module.BIBTEX) 
+
+    return all_plugin_info
+
+def _remove_duplicate_bibs(plugins_with_bibtex=Dict[str, Dict]):
+    """ Returns list of unique BibTeX to add """
+    bibtex_data = {v['bibtex_id']:v['bibtex'] for v in plugins_with_bibtex.values()}
+    alphabetized_bibtex = dict(sorted(bibtex_data.items()))
+    deduped_bibtex = list(alphabetized_bibtex.values())
+
+    return deduped_bibtex
+
+def _record_bibtex(bibtex_to_add:List[str], plugins_bib_fp:str):
+    """ insert new BibTeX into respective .bib files """
+    if not Path(BIBS_DIR).exists():
+        Path(BIBS_DIR).mkdir(parents=True)
+    with open(plugins_bib_fp, "w+") as f:
+        for bibtex in bibtex_to_add:
+            f.write(bibtex)
+            f.write('\n')
+
+def create_bibfile(plugins=Dict[str, Dict], plugin_type='refs'):
+    """ For all plugins, add bibtex (if present) to .bib files """
+    if plugin_type == 'refs':
+        plugins = dict(ele for sub in plugins.values() for ele in sub.items())
+    # drop plugins without bibtex
+    plugins_with_bibtex = {k:v for k,v in plugins.items() if 'bibtex' in v.keys()}
+    if len(plugins_with_bibtex.keys()) > 0:
+        plugins_bib_fp = Path(BIBS_DIR + plugin_type + '.bib')
+        # add bibtex (if present) to .bib files
+        bibtex_to_add = _remove_duplicate_bibs(plugins_with_bibtex)
+        _record_bibtex(bibtex_to_add, plugins_bib_fp)
+
+def _prepare_content(all_plugin_info:Dict[str, Dict]) -> Dict[str, Dict]:
+    """Converts plugin information into rst format
+
+    Returns a dict where key is plugin type, value is a dict
+    of plugin names (str) mapped to a dict of their info
+
+    NOTE: info is currently plugin directory paths and BiBTeX citations, 
+    but could expand to e.g. include description of plugin
+    """
+    prepared_plugin_info = {}
+    for plugin_type in all_plugin_info:
+        plugin_type_title = plugin_type.capitalize()
+        prepared_plugin_info[plugin_type_title] = {name:{'dirname':k, 
+                                                        'citation':(':cite:label:`' + v['bibtex_id'] +'`'
+                                                        if 'bibtex_id' in v.keys() else None)} 
+                                                        for k,v in all_plugin_info[plugin_type].items() 
+                                                        for name in v['plugin_names']}
+    return prepared_plugin_info
+
+def _write_to_rst(plugin_info:Dict[str,Dict]):
+    """ Writes plugin info to readthedocs plugins.rst """
+    with open(PLUGINS_DOC_FP, 'w+') as f:
+        doc = RstCloth(f)
+        doc.ref_target(name="plugins")
+        doc.newline()
+        doc.title('Plugins')
+        doc.newline()
+        for plugin_type in plugin_info:
+            doc.h3(plugin_type)
+            for plugin in plugin_info[plugin_type]:
+                doc.h4(plugin)
+                doc.content(plugin_info[plugin_type][plugin]['dirname'])
+                doc.newline()
+                if plugin_info[plugin_type][plugin]['citation']:
+                    doc.content(plugin_info[plugin_type][plugin]['citation'])
+                doc.newline()
+        doc.h2('Bibliography')
+        doc.directive(name="bibliography", fields=[('all','')])
+
+def update_readthedocs(all_plugin_info:Dict[str,Dict]):
+    """ For all plugins, add name and info to readthedocs (plugins.rst) """
+    prepared_plugin_info = _prepare_content(all_plugin_info) # rst formatting
+    _write_to_rst(prepared_plugin_info)
+
+if __name__ == '__main__':
+    all_plugin_info = get_all_plugin_info()
+    for plugin_type in all_plugin_info:
+        create_bibfile(all_plugin_info[plugin_type], plugin_type) # plugin type .bib file
+    create_bibfile(all_plugin_info) # one .bib file to rule them all
+    update_readthedocs(all_plugin_info)
diff --git a/docs2/source/examples.md b/docs2/source/examples.md
new file mode 100644
index 00000000..6fa53604
--- /dev/null
+++ b/docs2/source/examples.md
@@ -0,0 +1 @@
+../../examples/README.md
\ No newline at end of file
diff --git a/docs2/source/index.rst b/docs2/source/index.rst
new file mode 100644
index 00000000..87594746
--- /dev/null
+++ b/docs2/source/index.rst
@@ -0,0 +1,28 @@
+Brain-Score Language
+====================
+
+Brain-Score is a collection of benchmarks and models:
+benchmarks combine neural/behavioral data with a metric to score models on their alignment to humans,
+and models are evaluated as computational hypotheses of human brain processing.
+
+The Brain-Score Language library contains
+benchmarks that can easily be used to test language models on their alignment to human behavioral and internal brain
+processing,
+as well as language models that can easily be tested on new behavioral or neural data.
+This makes experimental data accessible to modelers, and computational models accessible to experimenters,
+accelerating progress in discovering ever-more-accurate models of the human brain and mind.
+
+The `score` function is the primary entry point to score a model on a benchmark.
+
+.. autofunction:: brainscore_language.score
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   modules/artificial_subject
+   modules/model_tutorial
+   modules/benchmark_tutorial
+   examples
+   modules/api_reference
+   modules/plugins
diff --git a/docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst b/docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst
new file mode 100644
index 00000000..7b19b2a1
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst
@@ -0,0 +1,29 @@
+brainscore\_language.artificial\_subject
+========================================
+
+.. automodule:: brainscore_language.artificial_subject
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+   
+      ArtificialSubject
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst b/docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst
new file mode 100644
index 00000000..f4f5eb59
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst
@@ -0,0 +1,31 @@
+brainscore\_language.benchmark\_helpers
+=======================================
+
+.. automodule:: brainscore_language.benchmark_helpers
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      _coords_match
+      ci_error
+      manual_merge
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst
new file mode 100644
index 00000000..192fdd62
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst
@@ -0,0 +1,29 @@
+brainscore\_language.model\_helpers.container
+=============================================
+
+.. automodule:: brainscore_language.model_helpers.container
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+   
+      ContainerSubject
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst
new file mode 100644
index 00000000..ef6e0d31
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst
@@ -0,0 +1,38 @@
+brainscore\_language.model\_helpers.embedding
+=============================================
+
+.. automodule:: brainscore_language.model_helpers.embedding
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      mean_over_words
+      remove_punctuation
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+   
+      EmbeddingSubject
+      GensimKeyedVectorsSubject
+      _GensimLookup
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst
new file mode 100644
index 00000000..645b5417
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst
@@ -0,0 +1,29 @@
+brainscore\_language.model\_helpers.huggingface
+===============================================
+
+.. automodule:: brainscore_language.model_helpers.huggingface
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+   
+      HuggingfaceSubject
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst
new file mode 100644
index 00000000..da22d03a
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst
@@ -0,0 +1,29 @@
+brainscore\_language.model\_helpers.preprocessing
+=================================================
+
+.. automodule:: brainscore_language.model_helpers.preprocessing
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      prepare_context
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst
new file mode 100644
index 00000000..6b72746e
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst
@@ -0,0 +1,34 @@
+brainscore\_language.model\_helpers
+===================================
+
+.. automodule:: brainscore_language.model_helpers
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
+.. rubric:: Modules
+
+.. autosummary::
+   :toctree:
+   :recursive:
+
+   brainscore_language.model_helpers.container
+   brainscore_language.model_helpers.embedding
+   brainscore_language.model_helpers.huggingface
+   brainscore_language.model_helpers.preprocessing
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.rst b/docs2/source/modules/_autosummary/brainscore_language.rst
new file mode 100644
index 00000000..e553f190
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.rst
@@ -0,0 +1,55 @@
+﻿brainscore\_language
+====================
+
+.. automodule:: brainscore_language
+
+   
+   
+   .. rubric:: Module Attributes
+
+   .. autosummary::
+   
+      data_registry
+      metric_registry
+      benchmark_registry
+      model_registry
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      _run_score
+      load_benchmark
+      load_dataset
+      load_metric
+      load_model
+      score
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
+.. rubric:: Modules
+
+.. autosummary::
+   :toctree:
+   :recursive:
+
+   brainscore_language.artificial_subject
+   brainscore_language.benchmark_helpers
+   brainscore_language.model_helpers
+   brainscore_language.submission
+   brainscore_language.utils
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.submission.config.rst b/docs2/source/modules/_autosummary/brainscore_language.submission.config.rst
new file mode 100644
index 00000000..6af546cc
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.submission.config.rst
@@ -0,0 +1,29 @@
+brainscore\_language.submission.config
+======================================
+
+.. automodule:: brainscore_language.submission.config
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      get_database_secret
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.submission.rst b/docs2/source/modules/_autosummary/brainscore_language.submission.rst
new file mode 100644
index 00000000..3dc7231e
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.submission.rst
@@ -0,0 +1,32 @@
+brainscore\_language.submission
+===============================
+
+.. automodule:: brainscore_language.submission
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
+.. rubric:: Modules
+
+.. autosummary::
+   :toctree:
+   :recursive:
+
+   brainscore_language.submission.config
+   brainscore_language.submission.endpoints
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst
new file mode 100644
index 00000000..6983b24d
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst
@@ -0,0 +1,29 @@
+brainscore\_language.utils.ceiling
+==================================
+
+.. automodule:: brainscore_language.utils.ceiling
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      ceiling_normalize
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.rst
new file mode 100644
index 00000000..deeb61b8
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.utils.rst
@@ -0,0 +1,41 @@
+brainscore\_language.utils
+==========================
+
+.. automodule:: brainscore_language.utils
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      attach_presentation_meta
+      fullname
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
+.. rubric:: Modules
+
+.. autosummary::
+   :toctree:
+   :recursive:
+
+   brainscore_language.utils.ceiling
+   brainscore_language.utils.s3
+   brainscore_language.utils.transformations
+   brainscore_language.utils.xarray
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst
new file mode 100644
index 00000000..7a9f7169
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst
@@ -0,0 +1,30 @@
+brainscore\_language.utils.s3
+=============================
+
+.. automodule:: brainscore_language.utils.s3
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      load_from_s3
+      upload_data_assembly
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst
new file mode 100644
index 00000000..ad8e05bb
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst
@@ -0,0 +1,43 @@
+brainscore\_language.utils.transformations
+==========================================
+
+.. automodule:: brainscore_language.utils.transformations
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      apply_aggregate
+      enumerate_done
+      extract_coord
+      standard_error_of_the_mean
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+   
+      CrossValidation
+      CrossValidationSingle
+      Split
+      TestOnlyCrossValidation
+      TestOnlyCrossValidationSingle
+      Transformation
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst
new file mode 100644
index 00000000..7725655a
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst
@@ -0,0 +1,31 @@
+brainscore\_language.utils.xarray
+=================================
+
+.. automodule:: brainscore_language.utils.xarray
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      collapse_multidim_coord
+      copy_metadata
+      fix_xr_dtypes
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/api_reference.rst b/docs2/source/modules/api_reference.rst
new file mode 100644
index 00000000..b87217db
--- /dev/null
+++ b/docs2/source/modules/api_reference.rst
@@ -0,0 +1,10 @@
+.. _api_reference:
+
+API Reference
+-------------
+
+.. autosummary::
+   :toctree: _autosummary
+   :recursive:
+
+   brainscore_language
diff --git a/docs2/source/modules/artificial_subject.rst b/docs2/source/modules/artificial_subject.rst
new file mode 100644
index 00000000..9cb4e82a
--- /dev/null
+++ b/docs2/source/modules/artificial_subject.rst
@@ -0,0 +1,9 @@
+.. _artificial_subject:
+
+*********************************
+ArtificialSubject model interface
+*********************************
+
+.. autoclass:: brainscore_language.artificial_subject.ArtificialSubject
+    :members:
+    :undoc-members:
diff --git a/docs2/source/modules/benchmark_tutorial.rst b/docs2/source/modules/benchmark_tutorial.rst
new file mode 100644
index 00000000..3f7e3946
--- /dev/null
+++ b/docs2/source/modules/benchmark_tutorial.rst
@@ -0,0 +1,364 @@
+.. _new_benchmark_tutorial:
+
+**********************
+New Benchmark Tutorial
+**********************
+
+This example walks through adding a new benchmark and scoring existing models on it.
+Everything can be developed locally with full access to publicly available models,
+but we strongly encourage you to submit your benchmark to Brain-Score to make it accessible to the community,
+and to make it into a goalpost that future models can be measured against.
+
+If you haven't already, check out
+`other benchmarks <https://github.com/brain-score/language/tree/main/brainscore_language/benchmarks>`_
+and the `docs <https://brain-score-language.readthedocs.io>`_.
+
+A benchmark reproduces the experimental paradigm on a model candidate,
+and tests model predictions against the experimentally observed data,
+using a similarity metric.
+
+In other words, a benchmark consists of three things (each of which is a plugin):
+
+1. experimental paradigm
+2. biological data (neural/behavioral)
+3. similarity metric
+
+For the biological data and the similarity metric, benchmarks can use previously submitted data and metrics.
+I.e., re-combinations are very much valid.
+
+Brain-Score secondarily also hosts benchmarks that do not pertain to neural or behavioral data,
+e.g. engineering (ML) benchmarks and other analyses. These benchmarks do not include biological data,
+and the metric might be ground-truth accuracy.
+
+
+1. Package data (optional)
+==========================
+
+You can contribute new data by submitting a data plugin.
+If you are building a benchmark using existing data, you can skip this step.
+
+We use the `BrainIO <https://github.com/brain-score/brainio>`_ format to organize data.
+Datasets in brainio are called *assemblies* and are based on `xarray <https://xarray.dev>`_,
+a multi-dimensional version of pandas, which allows for metadata on numpy arrays of arbitrary dimensionality.
+
+Most assemblies contain a :code:`presentation` dimension for the stimuli that were presented, as well as potentially other
+dimensions for e.g. different subjects or different voxels.
+The actual measurements (e.g. reading times, or voxel activity) are typically the values of an assembly.
+
+Behavioral data
+---------------
+
+The following is an excerpt from the
+`Futrell2018 data packaging <https://github.com/brain-score/language/blob/3e6fff2fda528f06cf5ffb3c5474f81acfa91ffe/brainscore_language/data/futrell2018/data_packaging.py>`_.
+
+.. code-block:: python
+
+    from brainio.assemblies import BehavioralAssembly
+
+    reading_times = parse_experiment_data(...)  # load the experimental data, e.g. from .csv files
+    # ... obtain as much metadata as we can ...
+
+    assembly = BehavioralAssembly(reading_times, coords={
+            'word': ('presentation', voc_word),
+            'stimulus_id': ('presentation', stimulus_ID),
+            ...
+            'subject_id': ('subject', subjects),
+            'WorkTimeInSeconds': ('subject', WorkTimeInSeconds_meta),
+            ...
+            }, dims=('presentation', 'subject'))
+
+Neural data
+-----------
+
+The following is an excerpt from the
+`Pereira2018 data packaging <https://github.com/brain-score/language/blob/189eed0c2396756fc419115e57633410d0347e59/brainscore_language/data/pereira2018/data_packaging.py>`_.
+
+.. code-block:: python
+
+    from brainio.assemblies import NeuroidAssembly
+
+    neural_recordings = parse_experiment_data(...)  # load the experimental data, e.g. from .mat files
+    # ... obtain as much metadata as we can ...
+
+    assembly = NeuroidAssembly(neural_recordings, coords={
+           'stimulus': ('presentation', sentences),
+           'stimulus_id': ('presentation', stimulus_id),
+           ...
+           'neuroid_id': ('neuroid', voxel_number),
+           'atlas': ('neuroid', atlases),
+           ...
+           }, dims=['presentation', 'neuroid'])
+
+Register the data plugin
+------------------------
+
+So that your data can be accessed via an identifier, you need to define an endpoint in the plugin registry.
+
+For instance, if your data is on S3, the plugin might look as follows:
+
+.. code-block:: python
+
+    from brainscore_language.utils.s3 import load_from_s3
+
+    def load_assembly() -> BehavioralAssembly:
+        assembly = load_from_s3(
+            identifier="Futrell2018",
+            version_id="MpR.gIXN8UrUnqwQyj.kCrh4VWrBvsGf",
+            sha1="381ccc8038fbdb31235b5f3e1d350f359b5e287f")
+        return assembly
+
+    data_registry['Futrell2018'] = load_assembly
+
+Unit tests
+----------
+
+To ensure the data is in the right format, and not corrupted by any future changes, we require all plugins to include
+an accompanying :code:`test.py` file with unit tests.
+
+For instance, here is a small unit test example validating the dimensions of a reading times dataset.
+
+
+.. code-block:: python
+
+    from brainscore_language import load_dataset
+
+    def test_shape(self):
+        assembly = load_dataset('Futrell2018')
+        assert len(assembly['presentation']) == 10256
+        assert len(assembly['subject']) == 180
+
+These unit tests guarantee the continued validity of your plugin, so we encourage rigorous testing methods.
+
+
+2. Create metric (optional)
+===========================
+
+You can contribute a new metric by submitting a metric plugin.
+If you are building a benchmark using an existing metric, you can skip this step.
+
+Metrics compute the similarity between two measurements.
+These can be model-vs-human, human-vs-human, or model-model.
+Measurements could for instance be reading times, or fMRI recordings.
+
+A simple metric could be the pearson correlation of two measurements:
+
+.. code-block:: python
+
+    import numpy as np
+    from scipy.stats import pearsonr
+    from brainio.assemblies import DataAssembly
+    from brainscore_core.metrics import Metric, Score
+
+    class PearsonCorrelation(Metric):
+        def __call__(self, assembly1: DataAssembly, assembly2: DataAssembly) -> Score:
+            rvalue, pvalue = pearsonr(assembly1, assembly2)
+            score = Score(np.abs(rvalue))  # similarity score between 0 and 1 indicating alignment of the two assemblies
+            return score
+
+    metric_registry['pearsonr'] = PearsonCorrelation
+
+This is a very simple example and ignores e.g. checks ensuring the ordering is the same, cross-validation,
+or keeping track of metadata.
+
+Unit tests
+----------
+
+As with all plugins, please provide a :code:`test.py` file to ensure the continued validity of your metric.
+For instance, the following is an excerpt from the
+`pearson correlation tests <https://github.com/brain-score/language/blob/3e6fff2fda528f06cf5ffb3c5474f81acfa91ffe/brainscore_language/metrics/pearson_correlation/test.py>`_.
+
+.. code-block:: python
+
+    from brainscore_language import load_metric
+
+    def test_weak_correlation():
+        a1 = [1, 2, 3, 4, 5]
+        a2 = [3, 1, 6, 1, 2]
+        metric = load_metric('pearsonr')
+        score = metric(a1, a2)
+        assert score == approx(.152, abs=.005)
+
+
+3. Build the benchmark
+======================
+
+With data and metric in place, you can put the two together to build a benchmark that scores model similarity to
+behavioral or neural measurements.
+
+Structure
+---------
+
+A benchmark runs the experiment on a (model) subject candidate in the :code:`__call__` method,
+and compares model predictions against experimental data.
+All interactions with the model are via methods defined in the :doc:`ArtificialSubject <./artificial_subject>` interface
+-- this allows all present and future models to be tested on your benchmark.
+
+For example:
+
+.. code-block:: python
+
+    from brainscore_core.benchmarks import BenchmarkBase
+    from brainscore_language import load_dataset, load_metric, ArtificialSubject
+
+    class MyBenchmark(BenchmarkBase):
+        def __init__(self):
+            self.data = load_dataset('mydata')
+            self.metric = load_metric('pearsonr')
+            ...
+
+        def __call__(self, candidate: ArtificialSubject) -> Score:
+            candidate.perform_behavioral_task(ArtificialSubject.Task.reading_times)  # or any other task
+            # or e.g. candidate.perform_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
+            #                                            recording_type=ArtificialSubject.RecordingType.fMRI)
+            predictions = candidate.digest_text(stimuli)['behavior']
+            raw_score = self.metric(predictions, self.data)
+            score = ceiling_normalize(raw_score, self.ceiling)
+            return score
+
+
+Behavioral benchmark
+--------------------
+
+To test for behavioral alignment, benchmarks compare model outputs to human behavioral measurements.
+The model is instructed to perform a certain task (e.g. output reading times), and then prompted to digest text input,
+for which it will output behavioral predictions.
+
+For instance, here is a sample excerpt from the
+`Futrell2018 benchmark <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/futrell2018/__init__.py>`_
+comparing reading times:
+
+.. code-block:: python
+
+    class Futrell2018Pearsonr(BenchmarkBase):
+        ...
+
+        def __call__(self, candidate: ArtificialSubject) -> Score:
+            candidate.perform_behavioral_task(ArtificialSubject.Task.reading_times)
+            stimuli = self.data['stimulus']
+            predictions = candidate.digest_text(stimuli.values)['behavior']
+            raw_score = self.metric(predictions, self.data)
+            score = ceiling_normalize(raw_score, self.ceiling)
+            return score
+
+    benchmark_registry['Futrell2018-pearsonr'] = Futrell2018Pearsonr
+
+Neural benchmark
+----------------
+
+To test for neural alignment, benchmarks compare model internals to human internal neural activity,
+measured e.g. via fMRI or ECoG.
+Running the experiment on the model subject, the benchmark first instructs where and how to perform neural recording,
+and then prompts the subject with text input, for which the model will output neural predictions.
+
+For instance, here is a sample excerpt from the
+`Pereira2018 linear-predictivity benchmark <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/pereira2018/__init__.py#L55>`_
+linearly comparing fMRI activity:
+
+.. code-block:: python
+
+    class Pereira2018Linear(BenchmarkBase):
+        ...
+
+        def __call__(self, candidate: ArtificialSubject) -> Score:
+            candidate.perform_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
+                                               recording_type=ArtificialSubject.RecordingType.fMRI)
+            stimuli = self.data['stimulus']
+            predictions = candidate.digest_text(stimuli.values)['neural']
+            raw_score = self.metric(predictions, self.data)
+            score = ceiling_normalize(raw_score, self.ceiling)
+            return score
+
+    benchmark_registry['Pereira2018-linear'] = Pereira2018Linear
+
+Ceiling
+-------
+
+You might have noticed that model alignment scores are always relative to a ceiling.
+The ceiling is an estimate of how well the "perfect model" would perform.
+Often, this is an estimate of how well an average human is aligned to the specific data.
+
+For instance, the `Pereira2018 ceiling <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/pereira2018/ceiling_packaging.py#L52>`_
+compares the linear alignment (i.e. using the same metric) of n-1 subjects to a heldout subject.
+The `Futrell2018 ceiling <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/futrell2018/__init__.py#L59>`_
+compares how well one half of subjects is aligned to the other half of subjects,
+again using the same metric that is used for model comparisons.
+
+Running models on your benchmark
+--------------------------------
+
+You can now locally run models on your benchmark
+(see `4. Submit to Brain-Score`_ for running models on the Brain-Score platform).
+Run the `score function <https://brain-score-language.readthedocs.io/en/latest/index.html#brainscore_language.score>`_,
+passing in the desired model identifier(s) and the identifier for your benchmark.
+
+For instance, you might run:
+
+.. code-block:: python
+
+    from brainscore_language import score
+
+    model_score = score(model_identifier='distilgpt2', benchmark_identifier='benchmarkid-metricid')
+
+Unit tests
+----------
+
+As with all plugins, please provide a :code:`test.py` file to ensure the continued validity of your benchmark.
+For instance, the following is an excerpt from the
+`Futrell2018 tests <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/futrell2018/test.py>`_:
+
+.. code-block:: python
+
+    from brainscore_language import ArtificialSubject, load_benchmark
+
+    class DummyModel(ArtificialSubject):
+        def __init__(self, reading_times):
+            self.reading_times = reading_times
+
+        def digest_text(self, stimuli):
+            return {'behavior': BehavioralAssembly(self.reading_times, coords={
+                                        'context': ('presentation', stimuli),
+                                        'stimulus_id': ('presentation', np.arange(len(stimuli)))},
+                                    dims=['presentation'])}
+
+        def perform_behavioral_task(self, task: ArtificialSubject.Task):
+            if task != ArtificialSubject.Task.reading_times:
+                raise NotImplementedError()
+
+    def test_dummy_bad():
+        benchmark = load_benchmark('Futrell2018-pearsonr')
+        reading_times = RandomState(0).random(10256)
+        dummy_model = DummyModel(reading_times=reading_times)
+        score = benchmark(dummy_model)
+        assert score == approx(0.0098731 / .858, abs=0.001)
+
+    def test_ceiling():
+        benchmark = load_benchmark('Futrell2018-pearsonr')
+        ceiling = benchmark.ceiling
+        assert ceiling == approx(.858, abs=.0005)
+        assert ceiling.raw.median('split') == ceiling
+        assert ceiling.uncorrected_consistencies.median('split') < ceiling
+
+4. Submit to Brain-Score
+========================
+
+To share your plugins (data, metrics, and/or benchmarks) with the community
+and to make them accessible for continued model evaluation,
+please submit them to the platform.
+
+There are two main ways to do that:
+
+1. By uploading a zip file on the website
+2. By submitting a github pull request with the proposed changes
+
+Both options result in the same outcome: your plugin will automatically be tested,
+and added to the codebase after it passes tests.
+
+Particulars on data
+-------------------
+
+To make data assemblies accessible for Brain-Score model evaluations, it needs to be uploaded.
+You can self-host your data (e.g. on S3/OSF), or contact us to host your data on S3.
+You can also choose to keep your data private such that models can be scored, but the data cannot be accessed.
+
+For uploading data to S3, see the :code:`upload_data_assembly`
+in `utils/s3 <https://github.com/brain-score/language/blob/main/brainscore_language/utils/s3.py>`_.
diff --git a/docs2/source/modules/model_tutorial.rst b/docs2/source/modules/model_tutorial.rst
new file mode 100644
index 00000000..40da77b2
--- /dev/null
+++ b/docs2/source/modules/model_tutorial.rst
@@ -0,0 +1,93 @@
+.. _new_model_tutorial:
+
+******************
+New Model Tutorial
+******************
+
+This example walks through adding a new model and scoring it on existing benchmarks.
+Everything can be developed locally with full access to publicly available benchmarks,
+but we strongly encourage you to submit your model to Brain-Score to make it accessible to the community,
+and to make it testable on future benchmarks.
+
+If you haven't already, check out
+`other models <https://github.com/brain-score/language/tree/main/brainscore_language/models>`_
+and the `docs <https://brain-score-language.readthedocs.io>`_.
+
+
+Adding the model plugin
+=======================
+
+We require models to implement the :doc:`ArtificialSubject API <./artificial_subject>`.
+This interface is the central communication point between models and benchmarks
+and guarantees that your model can be evaluated on all available benchmarks.
+It includes three central methods that set the model up for performing a behavioral task,
+for performing neural recordings, and for digesting text with behavioral and/or neural outputs.
+A model does not have to implement all three methods, it can for instance only engage on behavior, or only on neurons.
+
+HuggingFace models
+------------------
+
+For models on HuggingFace, we provide a simple :code:`HuggingfaceSubject` wrapper that lets you instantiate models in
+very few lines of code since the wrapper takes care of implementing
+the :doc:`ArtificialSubject <./artificial_subject>` interface.
+The main choice you will have to make is which layer corresponds to which brain region.
+For instance, the following is an excerpt from adding
+`gpt models <https://github.com/brain-score/language/blob/5e948f0be90327aefe5e2938b2b3a193d0109af2/brainscore_language/models/gpt/__init__.py>`_:
+
+.. code-block:: python
+
+    from brainscore_language.model_helpers.huggingface import HuggingfaceSubject
+
+    model_registry['distilgpt2'] = lambda: HuggingfaceSubject(model_id='distilgpt2', region_layer_mapping={
+        ArtificialSubject.RecordingTarget.language_system: 'transformer.h.5.mlp.dropout'})
+
+Unit tests
+----------
+
+As with all plugins, please provide a :code:`test.py` file to ensure the continued validity of your model.
+For instance, the following is an excerpt from the
+`tests for gpt2-xl <https://github.com/brain-score/language/blob/5e948f0be90327aefe5e2938b2b3a193d0109af2/brainscore_language/models/gpt/test.py>`_:
+
+.. code-block:: python
+
+    from brainscore_language import load_model
+
+    def test_next_word(model_identifier, ):
+        model = load_model('gpt2-xl')
+        text = ['the quick brown fox', 'jumps over', 'the lazy']
+        expected_next_words = ['jumps', 'the', 'dog']
+        model.perform_behavioral_task(task=ArtificialSubject.Task.next_word)
+        next_word_predictions = model.digest_text(text)['behavior']
+        np.testing.assert_array_equal(next_word_predictions, expected_next_words)
+
+
+Running your model on benchmarks
+================================
+
+You can now locally run models on your benchmark
+(see `Submit to Brain-Score`_ for running models on the Brain-Score platform).
+Run the `score function <https://brain-score-language.readthedocs.io/en/latest/index.html#brainscore_language.score>`_,
+passing in the desired benchmark identifier(s) and the identifier for your model.
+
+For instance, you might run:
+
+.. code-block:: python
+
+    from brainscore_language import score
+
+    model_score = score(model_identifier='distilgpt2', benchmark_identifier='Futrell2018-pearsonr')
+
+
+Submit to Brain-Score
+=====================
+
+To share your model plugin with the community and to make it accessible for continued benchmark evaluation,
+please submit it to the platform.
+
+There are two main ways to do that:
+
+1. By uploading a zip file on the website
+2. By submitting a github pull request with the proposed changes
+
+Both options result in the same outcome: your plugin will automatically be tested,
+and added to the codebase after it passes tests.
diff --git a/docs2/source/modules/plugins.rst b/docs2/source/modules/plugins.rst
new file mode 100644
index 00000000..9be29948
--- /dev/null
+++ b/docs2/source/modules/plugins.rst
@@ -0,0 +1,106 @@
+
+.. _plugins:
+=======
+Plugins
+=======
+
+Benchmarks
+~~~~~~~~~~
+Pereira2018.243sentences-linear
++++++++++++++++++++++++++++++++
+brainscore_language/benchmarks/pereira2018
+
+:cite:label:`pereira2018toward`
+
+Pereira2018.384sentences-linear
++++++++++++++++++++++++++++++++
+brainscore_language/benchmarks/pereira2018
+
+:cite:label:`pereira2018toward`
+
+Futrell2018-pearsonr
+++++++++++++++++++++
+brainscore_language/benchmarks/futrell2018
+
+
+Wikitext-accuracy
++++++++++++++++++
+brainscore_language/benchmarks/wikitext_next_word
+
+:cite:label:`merity2017pointer`
+
+Data
+~~~~
+Blank2014.fROI
+++++++++++++++
+brainscore_language/data/blank2014
+
+:cite:label:`blank2014functional`
+
+Pereira2018.language
+++++++++++++++++++++
+brainscore_language/data/pereira2018
+
+:cite:label:`pereira2018toward`
+
+Pereira2018.auditory
+++++++++++++++++++++
+brainscore_language/data/pereira2018
+
+:cite:label:`pereira2018toward`
+
+Futrell2018
++++++++++++
+brainscore_language/data/futrell2018
+
+:cite:label:`futrell2018natural`
+
+Fedorenko2016.language
+++++++++++++++++++++++
+brainscore_language/data/fedorenko2016
+
+:cite:label:`fedorenko2016neural`
+
+wikitext-2/test
++++++++++++++++
+brainscore_language/data/wikitext
+
+
+Metrics
+~~~~~~~
+pearsonr
+++++++++
+brainscore_language/metrics/pearson_correlation
+
+
+linear_pearsonr
++++++++++++++++
+brainscore_language/metrics/linear_predictivity
+
+
+accuracy
+++++++++
+brainscore_language/metrics/accuracy
+
+
+Models
+~~~~~~
+distilgpt2
+++++++++++
+brainscore_language/models/gpt
+
+
+gpt2-xl
++++++++
+brainscore_language/models/gpt
+
+
+glove-840b
+++++++++++
+brainscore_language/models/glove
+
+
+Bibliography
+------------
+.. bibliography::
+   :all:

From a83eb0090239036b4646f34bb1178c6d22b77251 Mon Sep 17 00:00:00 2001
From: Carol Jiang <cjiang24@evnode.ib.cluster>
Date: Fri, 22 Dec 2023 15:26:17 -0500
Subject: [PATCH 02/12] updated readthedocs

---
 brainscore_language/document_plugins.py       | 196 ++++++
 docs2/document_plugins.py                     | 157 +++++
 docs2/source/_static/overrides.css            |   5 +
 docs2/source/bibtex/data.bib                  |  40 ++
 docs2/source/bibtex/refs.bib                  |  40 ++
 docs2/source/conf.py                          |   5 +-
 .../brainscore_language.document_plugins.rst  |  38 ++
 .../brainscore_language.parser.rst            |  29 +
 .../_autosummary/brainscore_language.rst      |   1 +
 docs2/source/modules/plugins.rst              | 578 ++++++++++++++++--
 10 files changed, 1042 insertions(+), 47 deletions(-)
 create mode 100644 brainscore_language/document_plugins.py
 create mode 100644 docs2/document_plugins.py
 create mode 100644 docs2/source/_static/overrides.css
 create mode 100644 docs2/source/bibtex/data.bib
 create mode 100644 docs2/source/bibtex/refs.bib
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst
 create mode 100644 docs2/source/modules/_autosummary/brainscore_language.parser.rst

diff --git a/brainscore_language/document_plugins.py b/brainscore_language/document_plugins.py
new file mode 100644
index 00000000..3cc13f1b
--- /dev/null
+++ b/brainscore_language/document_plugins.py
@@ -0,0 +1,196 @@
+""" Make plugin details available to readthedocs """
+
+import json
+import logging
+from pathlib import Path
+import re
+from rstcloth import RstCloth
+from typing import Dict, List, Union
+
+from brainscore_language import load_dataset, load_metric
+
+# BIBS_DIR = 'docs/source/bibtex/'
+BIBS_DIR = '../docs2/source/bibtex/'
+GITHUB_DIR = 'https://github.com/brain-score/language/tree/main/brainscore_language/'
+# PLUGINS_DOC_FP = 'docs/source/modules/plugins.rst'
+PLUGINS_DOC_FP = '../docs2/source/modules/plugins.rst'
+PLUGINS_LIST_FP = 'brainscore_language/plugin_management/all_plugins.json'
+PLUGIN_DIRS = ['benchmarks', 'data', 'metrics', 'models']
+PLUGIN_TYPE_MAP = {plugin_dirtype:plugin_dirtype.strip('s') for plugin_dirtype in PLUGIN_DIRS}
+
+
+def _get_module_plugin_names(plugin_type:str, plugin_dir:Path) -> List[str]:
+    """ Returns list of plugins registered by module """
+    init_fp = plugin_dir / "__init__.py"
+    registry = PLUGIN_TYPE_MAP[plugin_type] + "_registry"
+
+    with open(init_fp, 'r') as f:
+        text = f.read()
+        registered_plugins = re.findall(registry+'\[(.*)\]', text)
+        cleaned_plugin_names = [name.replace('"', '').replace('\'', '') for name in registered_plugins]
+    
+    return cleaned_plugin_names
+
+def _id_from_bibtex(bibtex:str) -> str:
+    """ Returns BibTeX identifier from BibTeX """
+    return re.search('\{(.*?),', bibtex).group(1)
+
+def _data_metric_from_benchmark(filepath):
+    data_pattern = 'self.data = load_data' 
+    metric_pattern = 'self.metric = load_metric'
+    with open(filepath, 'r') as f:
+        content = f.read()
+
+    data_match = re.search(r'load_dataset\([\'"]([^\'"]*)[\'"]\)', content)
+    metric_match = re.search(r'load_metric\([\'"]([^\'"]*)[\'"]\)', content)
+
+    data_value = data_match.group(1) if data_match else None
+    metric_value = metric_match.group(1) if metric_match else None
+
+    return data_value, metric_value
+
+def get_all_plugin_info() -> Dict[str, Dict[str, Dict[str, Union[list, str, str]]]]:
+    """Add all plugins to respective type registries
+
+    Returns a dict where key is plugin type,
+    value is a dict where key is name of plugin dir,
+    value is a dict of plugin info:
+
+    plugin_names: list of names of all plugins registered by module
+    bibtex: a BibTeX string
+    bibtex_id: BibTeX identifier
+    """
+    all_plugin_info = {}
+    for plugin_type in PLUGIN_DIRS:
+        plugins_dir = Path(Path(__file__).parent, plugin_type)
+        for plugin_dir in plugins_dir.glob('[!._]*'):
+            plugin_dirname = str(plugin_dir)
+  
+            if plugin_type not in all_plugin_info:
+                all_plugin_info[plugin_type] = {plugin_dirname:{}}
+            else:
+                all_plugin_info[plugin_type].update({plugin_dirname:{}})
+
+            plugin_dir_dict = all_plugin_info[plugin_type][plugin_dirname]
+
+            plugin_names = _get_module_plugin_names(plugin_type, plugin_dir)
+            plugin_module_path = plugin_dirname.replace('/', '.')
+            plugin_module = __import__(plugin_module_path, fromlist=['BIBTEX'])
+
+            plugin_dir_dict['plugin_names'] = plugin_names
+            if hasattr(plugin_module, 'BIBTEX'):
+                plugin_dir_dict['bibtex'] = plugin_module.BIBTEX
+                plugin_dir_dict['bibtex_id'] = _id_from_bibtex(plugin_module.BIBTEX) 
+
+            if plugin_type == 'benchmarks':
+                data_value, metric_value = _data_metric_from_benchmark(plugin_dirname + '/benchmark.py')
+                if data_value:
+                    plugin_dir_dict['data_value'] = data_value
+                if metric_value:
+                    plugin_dir_dict['metric_value'] = metric_value
+
+    return all_plugin_info
+
+def _remove_duplicate_bibs(plugins_with_bibtex=Dict[str, Dict]):
+    """ Returns list of unique BibTeX to add """
+    bibtex_data = {v['bibtex_id']:v['bibtex'] for v in plugins_with_bibtex.values()}
+    alphabetized_bibtex = dict(sorted(bibtex_data.items()))
+    deduped_bibtex = list(alphabetized_bibtex.values())
+
+    return deduped_bibtex
+
+def _record_bibtex(bibtex_to_add:List[str], plugins_bib_fp:str):
+    """ insert new BibTeX into respective .bib files """
+    if not Path(BIBS_DIR).exists():
+        Path(BIBS_DIR).mkdir(parents=True)
+    with open(plugins_bib_fp, "w+") as f:
+        for bibtex in bibtex_to_add:
+            f.write(bibtex)
+            f.write('\n')
+
+def create_bibfile(plugins=Dict[str, Dict], plugin_type='refs'):
+    """ For all plugins, add bibtex (if present) to .bib files """
+    if plugin_type == 'refs':
+        plugins = dict(ele for sub in plugins.values() for ele in sub.items())
+    # drop plugins without bibtex
+    plugins_with_bibtex = {k:v for k,v in plugins.items() if 'bibtex' in v.keys()}
+    if len(plugins_with_bibtex.keys()) > 0:
+        plugins_bib_fp = Path(BIBS_DIR + plugin_type + '.bib')
+        # add bibtex (if present) to .bib files
+        bibtex_to_add = _remove_duplicate_bibs(plugins_with_bibtex)
+        _record_bibtex(bibtex_to_add, plugins_bib_fp)
+
+def _prepare_content(all_plugin_info:Dict[str, Dict]) -> Dict[str, Dict]:
+    """Converts plugin information into rst format
+
+    Returns a dict where key is plugin type, value is a dict
+    of plugin names (str) mapped to a dict of their info
+
+    NOTE: info is currently plugin directory paths and BiBTeX citations, 
+    but could expand to e.g. include description of plugin
+    """
+    prepared_plugin_info = {}
+    for plugin_type in all_plugin_info:
+        plugin_type_title = plugin_type.capitalize()
+        prepared_plugin_info[plugin_type_title] = {name:{'dirname':k, 
+                                                        'citation':(':cite:label:`' + v['bibtex_id'] +'`'
+                                                        if 'bibtex_id' in v.keys() else None),
+                                                        'data':(f":ref:`{v['data_value']} <{v['data_value']}>`"
+                                                        if 'data_value' in v.keys() else None),
+                                                        'metric':(f":ref:`{v['metric_value']} <{v['metric_value']}>`"
+                                                        if 'metric_value' in v.keys() else None)}
+                                                        for k,v in all_plugin_info[plugin_type].items() 
+                                                        for name in v['plugin_names']}
+    return prepared_plugin_info
+
+def _write_to_rst(plugin_info:Dict[str,Dict]):
+    """ Writes plugin info to readthedocs plugins.rst """
+    upper_levels = set()
+    with open(PLUGINS_DOC_FP, 'w+') as f:
+        doc = RstCloth(f)
+        doc.ref_target(name="plugins")
+        doc.newline()
+        doc.title('Plugins')
+        doc.newline()
+        for plugin_type in plugin_info:
+            doc.newline()
+            doc.h3(plugin_type)
+            doc.content(f'The following {plugin_type} are available. Plugin identifiers are grouped by directory, shown in bold.')
+            doc.newline()
+            for plugin in plugin_info[plugin_type]:
+                location = plugin_info[plugin_type][plugin]['dirname']
+                location_link = f'`{location} <{GITHUB_DIR}{location}>`_'
+                if location not in upper_levels:
+                    idx = location.index('/')
+                    doc.h4(location[idx+1:].capitalize())
+                    doc.content(f'Location: {location_link}')
+                    doc.newline()
+                    upper_levels.add(location)
+                doc.newline()
+                doc.ref_target(plugin)
+                doc.h6(plugin)
+                doc.newline()
+                if plugin_info[plugin_type][plugin]['data']:
+                    doc.content(f"Data: {plugin_info[plugin_type][plugin]['data']}", indent=2)
+                    doc.newline()
+                if plugin_info[plugin_type][plugin]['metric']:
+                    doc.content(f"Metric: {plugin_info[plugin_type][plugin]['metric']}", indent=2)
+                    doc.newline()
+                if plugin_info[plugin_type][plugin]['citation']:
+                    doc.content(f"Citation: {plugin_info[plugin_type][plugin]['citation']}", indent=2)
+                    doc.newline()
+                doc.newline()
+        doc.h2('Bibliography')
+        doc.directive(name="bibliography", fields=[('all','')])
+
+def update_readthedocs(all_plugin_info:Dict[str,Dict]):
+    """ For all plugins, add name and info to readthedocs (plugins.rst) """
+    prepared_plugin_info = _prepare_content(all_plugin_info) # rst formatting
+    _write_to_rst(prepared_plugin_info)
+
+if __name__ == '__main__':
+    all_plugin_info = get_all_plugin_info()
+    for plugin_type in all_plugin_info:
+        create_bibfile(all_plugin_info[plugin_type], plugin_type) # plugin type .bib file
+    create_bibfile(all_plugin_info) # one .bib file to rule them all
+    update_readthedocs(all_plugin_info)
diff --git a/docs2/document_plugins.py b/docs2/document_plugins.py
new file mode 100644
index 00000000..795e50e5
--- /dev/null
+++ b/docs2/document_plugins.py
@@ -0,0 +1,157 @@
+""" Make plugin details available to readthedocs """
+
+import json
+import logging
+from pathlib import Path
+import re
+from rstcloth import RstCloth
+from typing import Dict, List, Union
+
+# BIBS_DIR = 'docs/source/bibtex/'
+BIBS_DIR = 'source/bibtex/'
+# PLUGINS_DOC_FP = 'docs/source/modules/plugins.rst'
+PLUGINS_DOC_FP = 'source/modules/plugins2.rst'
+PLUGINS_LIST_FP = '../brainscore_language/plugin_management/all_plugins.json'
+PLUGIN_DIRS = ['benchmarks', 'data', 'metrics', 'models']
+PLUGIN_TYPE_MAP = {plugin_dirtype:plugin_dirtype.strip('s') for plugin_dirtype in PLUGIN_DIRS}
+
+
+def _get_module_plugin_names(plugin_type:str, plugin_dir:Path) -> List[str]:
+    """ Returns list of plugins registered by module """
+    init_fp = plugin_dir / "__init__.py"
+    registry = PLUGIN_TYPE_MAP[plugin_type] + "_registry"
+
+    with open(init_fp, 'r') as f:
+        text = f.read()
+        registered_plugins = re.findall(registry+'\[(.*)\]', text)
+        cleaned_plugin_names = [name.replace('"', '').replace('\'', '') for name in registered_plugins]
+    
+    return cleaned_plugin_names
+
+def _id_from_bibtex(bibtex:str) -> str:
+    """ Returns BibTeX identifier from BibTeX """
+    return re.search('\{(.*?),', bibtex).group(1)
+
+def get_all_plugin_info() -> Dict[str, Dict[str, Dict[str, Union[list, str, str]]]]:
+    """Add all plugins to respective type registries
+
+    Returns a dict where key is plugin type,
+    value is a dict where key is name of plugin dir,
+    value is a dict of plugin info:
+
+    plugin_names: list of names of all plugins registered by module
+    bibtex: a BibTeX string
+    bibtex_id: BibTeX identifier
+    """
+    all_plugin_info = {}
+    for plugin_type in PLUGIN_DIRS:
+        # plugins_dir = Path(Path(__file__).parents[1], plugin_type)
+        plugins_dir = Path(Path(__file__).resolve().parents[1], 'brainscore_language', plugin_type)
+        print('plugins_dir', plugins_dir.resolve())
+        for plugin_dir in plugins_dir.glob('[!._]*'):
+            idx = str(plugin_dir).index('brainscore_language')
+            # plugin_dirname = str(plugin_dir)
+            plugin_dirname = str(plugin_dir)[idx:]  
+            # print(plugin_dirname)
+  
+            if plugin_type not in all_plugin_info:
+                all_plugin_info[plugin_type] = {plugin_dirname:{}}
+            else:
+                all_plugin_info[plugin_type].update({plugin_dirname:{}})
+
+            plugin_dir_dict = all_plugin_info[plugin_type][plugin_dirname]
+
+            plugin_names = _get_module_plugin_names(plugin_type, plugin_dir)
+            plugin_module_path = plugin_dirname.replace('/', '.')
+            plugin_module = __import__(plugin_dirname, fromlist=['BIBTEX'])
+            # print('plugin_module', plugin_module)
+
+            plugin_dir_dict['plugin_names'] = plugin_names
+            if hasattr(plugin_module, 'BIBTEX'):
+                plugin_dir_dict['bibtex'] = plugin_module.BIBTEX
+                plugin_dir_dict['bibtex_id'] = _id_from_bibtex(plugin_module.BIBTEX) 
+
+    return all_plugin_info
+
+def _remove_duplicate_bibs(plugins_with_bibtex=Dict[str, Dict]):
+    """ Returns list of unique BibTeX to add """
+    bibtex_data = {v['bibtex_id']:v['bibtex'] for v in plugins_with_bibtex.values()}
+    alphabetized_bibtex = dict(sorted(bibtex_data.items()))
+    deduped_bibtex = list(alphabetized_bibtex.values())
+
+    return deduped_bibtex
+
+def _record_bibtex(bibtex_to_add:List[str], plugins_bib_fp:str):
+    """ insert new BibTeX into respective .bib files """
+    print('recording bibtex', Path(BIBS_DIR).resolve())
+    if not Path(BIBS_DIR).exists():
+        Path(BIBS_DIR).mkdir(parents=True)
+    print('plugins_bib_fp', plugins_bib_fp)
+    with open(plugins_bib_fp, "w+") as f:
+        for bibtex in bibtex_to_add:
+            f.write(bibtex)
+            f.write('\n')
+
+def create_bibfile(plugins=Dict[str, Dict], plugin_type='refs'):
+    """ For all plugins, add bibtex (if present) to .bib files """
+    if plugin_type == 'refs':
+        plugins = dict(ele for sub in plugins.values() for ele in sub.items())
+    # drop plugins without bibtex
+    plugins_with_bibtex = {k:v for k,v in plugins.items() if 'bibtex' in v.keys()}
+    if len(plugins_with_bibtex.keys()) > 0:
+        plugins_bib_fp = Path(BIBS_DIR + plugin_type + '.bib')
+        # add bibtex (if present) to .bib files
+        bibtex_to_add = _remove_duplicate_bibs(plugins_with_bibtex)
+        _record_bibtex(bibtex_to_add, plugins_bib_fp)
+
+def _prepare_content(all_plugin_info:Dict[str, Dict]) -> Dict[str, Dict]:
+    """Converts plugin information into rst format
+
+    Returns a dict where key is plugin type, value is a dict
+    of plugin names (str) mapped to a dict of their info
+
+    NOTE: info is currently plugin directory paths and BiBTeX citations, 
+    but could expand to e.g. include description of plugin
+    """
+    prepared_plugin_info = {}
+    for plugin_type in all_plugin_info:
+        plugin_type_title = plugin_type.capitalize()
+        prepared_plugin_info[plugin_type_title] = {name:{'dirname':k, 
+                                                        'citation':(':cite:label:`' + v['bibtex_id'] +'`'
+                                                        if 'bibtex_id' in v.keys() else None)} 
+                                                        for k,v in all_plugin_info[plugin_type].items() 
+                                                        for name in v['plugin_names']}
+    return prepared_plugin_info
+
+def _write_to_rst(plugin_info:Dict[str,Dict]):
+    """ Writes plugin info to readthedocs plugins.rst """
+    print(PLUGINS_DOC_FP)
+    with open(PLUGINS_DOC_FP, 'w+') as f:
+        doc = RstCloth(f)
+        doc.ref_target(name="plugins")
+        doc.newline()
+        doc.title('Plugins')
+        doc.newline()
+        for plugin_type in plugin_info:
+            doc.h3(plugin_type)
+            for plugin in plugin_info[plugin_type]:
+                doc.h4(plugin)
+                doc.content(plugin_info[plugin_type][plugin]['dirname'])
+                doc.newline()
+                if plugin_info[plugin_type][plugin]['citation']:
+                    doc.content(plugin_info[plugin_type][plugin]['citation'])
+                doc.newline()
+        doc.h2('Bibliography')
+        doc.directive(name="bibliography", fields=[('all','')])
+
+def update_readthedocs(all_plugin_info:Dict[str,Dict]):
+    """ For all plugins, add name and info to readthedocs (plugins.rst) """
+    prepared_plugin_info = _prepare_content(all_plugin_info) # rst formatting
+    _write_to_rst(prepared_plugin_info)
+
+if __name__ == '__main__':
+    all_plugin_info = get_all_plugin_info()
+    for plugin_type in all_plugin_info:
+        create_bibfile(all_plugin_info[plugin_type], plugin_type) # plugin type .bib file
+    create_bibfile(all_plugin_info) # one .bib file to rule them all
+    update_readthedocs(all_plugin_info)
diff --git a/docs2/source/_static/overrides.css b/docs2/source/_static/overrides.css
new file mode 100644
index 00000000..b67302d2
--- /dev/null
+++ b/docs2/source/_static/overrides.css
@@ -0,0 +1,5 @@
+@import url("sphinx_rtd_theme.css");
+
+h6 {
+    font-size: 5px;
+}
\ No newline at end of file
diff --git a/docs2/source/bibtex/data.bib b/docs2/source/bibtex/data.bib
new file mode 100644
index 00000000..589d0bf9
--- /dev/null
+++ b/docs2/source/bibtex/data.bib
@@ -0,0 +1,40 @@
+@article{blank2014functional,
+  title={A functional dissociation between language and multiple-demand systems revealed in patterns of BOLD signal fluctuations},
+  author={Blank, Idan and Kanwisher, Nancy and Fedorenko, Evelina},
+  journal={Journal of neurophysiology},
+  volume={112},
+  number={5},
+  pages={1105--1118},
+  year={2014},
+  publisher={American Physiological Society Bethesda, MD}
+}
+@article{fedorenko2016neural,
+  title={Neural correlate of the construction of sentence meaning},
+  author={Fedorenko, Evelina and Scott, Terri L and Brunner, Peter and Coon, William G and Pritchett, Brianna and 
+          Schalk, Gerwin and Kanwisher, Nancy},
+  journal={Proceedings of the National Academy of Sciences},
+  volume={113},
+  number={41},
+  pages={E6256--E6262},
+  year={2016},
+  publisher={National Acad Sciences}
+}
+@proceedings{futrell2018natural,
+  title={The Natural Stories Corpus},
+  author={Futrell, Richard and Gibson, Edward and Tily, Harry J. and Blank, Idan and Vishnevetsky, Anastasia and
+          Piantadosi, Steven T. and Fedorenko, Evelina},
+  conference={International Conference on Language Resources and Evaluation (LREC)},
+  url={http://www.lrec-conf.org/proceedings/lrec2018/pdf/337.pdf},
+  year={2018}
+}
+@article{pereira2018toward,
+  title={Toward a universal decoder of linguistic meaning from brain activation},
+  author={Pereira, Francisco and Lou, Bin and Pritchett, Brianna and Ritter, Samuel and Gershman, Samuel J 
+          and Kanwisher, Nancy and Botvinick, Matthew and Fedorenko, Evelina},
+  journal={Nature communications},
+  volume={9},
+  number={1},
+  pages={1--13},
+  year={2018},
+  publisher={Nature Publishing Group}
+}
diff --git a/docs2/source/bibtex/refs.bib b/docs2/source/bibtex/refs.bib
new file mode 100644
index 00000000..589d0bf9
--- /dev/null
+++ b/docs2/source/bibtex/refs.bib
@@ -0,0 +1,40 @@
+@article{blank2014functional,
+  title={A functional dissociation between language and multiple-demand systems revealed in patterns of BOLD signal fluctuations},
+  author={Blank, Idan and Kanwisher, Nancy and Fedorenko, Evelina},
+  journal={Journal of neurophysiology},
+  volume={112},
+  number={5},
+  pages={1105--1118},
+  year={2014},
+  publisher={American Physiological Society Bethesda, MD}
+}
+@article{fedorenko2016neural,
+  title={Neural correlate of the construction of sentence meaning},
+  author={Fedorenko, Evelina and Scott, Terri L and Brunner, Peter and Coon, William G and Pritchett, Brianna and 
+          Schalk, Gerwin and Kanwisher, Nancy},
+  journal={Proceedings of the National Academy of Sciences},
+  volume={113},
+  number={41},
+  pages={E6256--E6262},
+  year={2016},
+  publisher={National Acad Sciences}
+}
+@proceedings{futrell2018natural,
+  title={The Natural Stories Corpus},
+  author={Futrell, Richard and Gibson, Edward and Tily, Harry J. and Blank, Idan and Vishnevetsky, Anastasia and
+          Piantadosi, Steven T. and Fedorenko, Evelina},
+  conference={International Conference on Language Resources and Evaluation (LREC)},
+  url={http://www.lrec-conf.org/proceedings/lrec2018/pdf/337.pdf},
+  year={2018}
+}
+@article{pereira2018toward,
+  title={Toward a universal decoder of linguistic meaning from brain activation},
+  author={Pereira, Francisco and Lou, Bin and Pritchett, Brianna and Ritter, Samuel and Gershman, Samuel J 
+          and Kanwisher, Nancy and Botvinick, Matthew and Fedorenko, Evelina},
+  journal={Nature communications},
+  volume={9},
+  number={1},
+  pages={1--13},
+  year={2018},
+  publisher={Nature Publishing Group}
+}
diff --git a/docs2/source/conf.py b/docs2/source/conf.py
index f6e384e5..ddc40a5b 100644
--- a/docs2/source/conf.py
+++ b/docs2/source/conf.py
@@ -35,7 +35,9 @@
     'recommonmark',
     'sphinx.ext.viewcode',
     'sphinx_rtd_theme',
-    'sphinxcontrib.bibtex'
+    'sphinxcontrib.bibtex',
+    'rstcloth',
+    'sphinx.ext.autosectionlabel'
 ]
 autosummary_generate = True  # Turn on sphinx.ext.autosummary
 
@@ -64,6 +66,7 @@
 # a list of builtin themes.
 #
 html_theme = "sphinx_rtd_theme"
+html_style = "overrides.css"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
diff --git a/docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst b/docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst
new file mode 100644
index 00000000..4311171c
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst
@@ -0,0 +1,38 @@
+brainscore\_language.document\_plugins
+======================================
+
+.. automodule:: brainscore_language.document_plugins
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      _data_metric_from_benchmark
+      _get_module_plugin_names
+      _id_from_bibtex
+      _prepare_content
+      _record_bibtex
+      _remove_duplicate_bibs
+      _write_to_rst
+      create_bibfile
+      get_all_plugin_info
+      update_readthedocs
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.parser.rst b/docs2/source/modules/_autosummary/brainscore_language.parser.rst
new file mode 100644
index 00000000..5f95208e
--- /dev/null
+++ b/docs2/source/modules/_autosummary/brainscore_language.parser.rst
@@ -0,0 +1,29 @@
+brainscore\_language.parser
+===========================
+
+.. automodule:: brainscore_language.parser
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+   
+      parse
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs2/source/modules/_autosummary/brainscore_language.rst b/docs2/source/modules/_autosummary/brainscore_language.rst
index e553f190..383aba0e 100644
--- a/docs2/source/modules/_autosummary/brainscore_language.rst
+++ b/docs2/source/modules/_autosummary/brainscore_language.rst
@@ -49,6 +49,7 @@
 
    brainscore_language.artificial_subject
    brainscore_language.benchmark_helpers
+   brainscore_language.document_plugins
    brainscore_language.model_helpers
    brainscore_language.submission
    brainscore_language.utils
diff --git a/docs2/source/modules/plugins.rst b/docs2/source/modules/plugins.rst
index 9be29948..b6a6bb8a 100644
--- a/docs2/source/modules/plugins.rst
+++ b/docs2/source/modules/plugins.rst
@@ -1,103 +1,589 @@
-
 .. _plugins:
+
 =======
 Plugins
 =======
 
+
 Benchmarks
 ~~~~~~~~~~
+The following Benchmarks are available. Plugin identifiers are grouped
+by directory, shown in bold.
+
+Futrell2018
++++++++++++
+Location: `benchmarks/futrell2018
+<https://github.com/brain-score/language/tree/main/brainscore_language/benchmarks/futrell2018>`_
+
+
+.. _Futrell2018-pearsonr:
+Futrell2018-pearsonr
+;;;;;;;;;;;;;;;;;;;;
+
+  Data: :ref:`Futrell2018 <Futrell2018>`
+
+  Metric: :ref:`pearsonr <pearsonr>`
+
+
+Pereira2018
++++++++++++
+Location: `benchmarks/pereira2018
+<https://github.com/brain-score/language/tree/main/brainscore_language/benchmarks/pereira2018>`_
+
+
+.. _Pereira2018.243sentences-linear:
 Pereira2018.243sentences-linear
-+++++++++++++++++++++++++++++++
-brainscore_language/benchmarks/pereira2018
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-:cite:label:`pereira2018toward`
+  Data: :ref:`Pereira2018.language <Pereira2018.language>`
 
+  Metric: :ref:`linear_pearsonr <linear_pearsonr>`
+
+
+
+.. _Pereira2018.384sentences-linear:
 Pereira2018.384sentences-linear
-+++++++++++++++++++++++++++++++
-brainscore_language/benchmarks/pereira2018
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-:cite:label:`pereira2018toward`
+  Data: :ref:`Pereira2018.language <Pereira2018.language>`
 
-Futrell2018-pearsonr
-++++++++++++++++++++
-brainscore_language/benchmarks/futrell2018
+  Metric: :ref:`linear_pearsonr <linear_pearsonr>`
 
 
+Wikitext_next_word
+++++++++++++++++++
+Location: `benchmarks/wikitext_next_word
+<https://github.com/brain-score/language/tree/main/brainscore_language/benchmarks/wikitext_next_word>`_
+
+
+.. _Wikitext-accuracy:
 Wikitext-accuracy
-+++++++++++++++++
-brainscore_language/benchmarks/wikitext_next_word
+;;;;;;;;;;;;;;;;;
+
+  Data: :ref:`wikitext-2/test <wikitext-2/test>`
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+Blank2014
++++++++++
+Location: `benchmarks/blank2014
+<https://github.com/brain-score/language/tree/main/brainscore_language/benchmarks/blank2014>`_
+
+
+.. _Blank2014-linear:
+Blank2014-linear
+;;;;;;;;;;;;;;;;
+
+  Data: :ref:`Blank2014.fROI <Blank2014.fROI>`
+
+  Metric: :ref:`linear_pearsonr <linear_pearsonr>`
+
+
+Syntaxgym
++++++++++
+Location: `benchmarks/syntaxgym
+<https://github.com/brain-score/language/tree/main/brainscore_language/benchmarks/syntaxgym>`_
+
+
+.. _syntaxgym-center_embed:
+syntaxgym-center_embed
+;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-center_embed_mod:
+syntaxgym-center_embed_mod
+;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-cleft:
+syntaxgym-cleft
+;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-cleft_modifier:
+syntaxgym-cleft_modifier
+;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-fgd_hierarchy:
+syntaxgym-fgd_hierarchy
+;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-fgd_object:
+syntaxgym-fgd_object
+;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-fgd_pp:
+syntaxgym-fgd_pp
+;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-fgd_subject:
+syntaxgym-fgd_subject
+;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-mvrr:
+syntaxgym-mvrr
+;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-mvrr_mod:
+syntaxgym-mvrr_mod
+;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npi_orc_any:
+syntaxgym-npi_orc_any
+;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npi_orc_ever:
+syntaxgym-npi_orc_ever
+;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npi_src_any:
+syntaxgym-npi_src_any
+;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npi_src_ever:
+syntaxgym-npi_src_ever
+;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npz_ambig:
+syntaxgym-npz_ambig
+;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npz_ambig_mod:
+syntaxgym-npz_ambig_mod
+;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npz_obj:
+syntaxgym-npz_obj
+;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-npz_obj_mod:
+syntaxgym-npz_obj_mod
+;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-number_orc:
+syntaxgym-number_orc
+;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-number_prep:
+syntaxgym-number_prep
+;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-number_src:
+syntaxgym-number_src
+;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-reflexive_orc_fem:
+syntaxgym-reflexive_orc_fem
+;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-reflexive_orc_masc:
+syntaxgym-reflexive_orc_masc
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-reflexive_prep_fem:
+syntaxgym-reflexive_prep_fem
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-reflexive_prep_masc:
+syntaxgym-reflexive_prep_masc
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-reflexive_src_fem:
+syntaxgym-reflexive_src_fem
+;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-reflexive_src_masc:
+syntaxgym-reflexive_src_masc
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-subordination:
+syntaxgym-subordination
+;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-subordination_orc-orc:
+syntaxgym-subordination_orc-orc
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-subordination_pp-pp:
+syntaxgym-subordination_pp-pp
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
+
+
+.. _syntaxgym-subordination_src-src:
+syntaxgym-subordination_src-src
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+  Metric: :ref:`accuracy <accuracy>`
+
 
-:cite:label:`merity2017pointer`
 
 Data
 ~~~~
+The following Data are available. Plugin identifiers are grouped by
+directory, shown in bold.
+
+Blank2014
++++++++++
+Location: `data/blank2014
+<https://github.com/brain-score/language/tree/main/brainscore_language/data/blank2014>`_
+
+
+.. _Blank2014.fROI:
 Blank2014.fROI
-++++++++++++++
-brainscore_language/data/blank2014
+;;;;;;;;;;;;;;
 
-:cite:label:`blank2014functional`
+  Citation: :cite:label:`blank2014functional`
 
-Pereira2018.language
-++++++++++++++++++++
-brainscore_language/data/pereira2018
 
-:cite:label:`pereira2018toward`
+Fedorenko2016
++++++++++++++
+Location: `data/fedorenko2016
+<https://github.com/brain-score/language/tree/main/brainscore_language/data/fedorenko2016>`_
 
-Pereira2018.auditory
-++++++++++++++++++++
-brainscore_language/data/pereira2018
 
-:cite:label:`pereira2018toward`
+.. _Fedorenko2016.language:
+Fedorenko2016.language
+;;;;;;;;;;;;;;;;;;;;;;
+
+  Citation: :cite:label:`fedorenko2016neural`
+
 
 Futrell2018
 +++++++++++
-brainscore_language/data/futrell2018
+Location: `data/futrell2018
+<https://github.com/brain-score/language/tree/main/brainscore_language/data/futrell2018>`_
 
-:cite:label:`futrell2018natural`
 
-Fedorenko2016.language
-++++++++++++++++++++++
-brainscore_language/data/fedorenko2016
+.. _Futrell2018:
+Futrell2018
+;;;;;;;;;;;
+
+  Citation: :cite:label:`futrell2018natural`
 
-:cite:label:`fedorenko2016neural`
 
+Pereira2018
++++++++++++
+Location: `data/pereira2018
+<https://github.com/brain-score/language/tree/main/brainscore_language/data/pereira2018>`_
+
+
+.. _Pereira2018.language:
+Pereira2018.language
+;;;;;;;;;;;;;;;;;;;;
+
+  Citation: :cite:label:`pereira2018toward`
+
+
+
+.. _Pereira2018.auditory:
+Pereira2018.auditory
+;;;;;;;;;;;;;;;;;;;;
+
+  Citation: :cite:label:`pereira2018toward`
+
+
+Wikitext
+++++++++
+Location: `data/wikitext
+<https://github.com/brain-score/language/tree/main/brainscore_language/data/wikitext>`_
+
+
+.. _wikitext-2/test:
 wikitext-2/test
-+++++++++++++++
-brainscore_language/data/wikitext
+;;;;;;;;;;;;;;;
+
 
 
 Metrics
 ~~~~~~~
-pearsonr
+The following Metrics are available. Plugin identifiers are grouped by
+directory, shown in bold.
+
+Accuracy
 ++++++++
-brainscore_language/metrics/pearson_correlation
+Location: `metrics/accuracy
+<https://github.com/brain-score/language/tree/main/brainscore_language/metrics/accuracy>`_
+
+
+.. _accuracy:
+accuracy
+;;;;;;;;
+
+
+Linear_predictivity
++++++++++++++++++++
+Location: `metrics/linear_predictivity
+<https://github.com/brain-score/language/tree/main/brainscore_language/metrics/linear_predictivity>`_
 
 
+.. _linear_pearsonr:
 linear_pearsonr
-+++++++++++++++
-brainscore_language/metrics/linear_predictivity
+;;;;;;;;;;;;;;;
 
 
-accuracy
-++++++++
-brainscore_language/metrics/accuracy
+Pearson_correlation
++++++++++++++++++++
+Location: `metrics/pearson_correlation
+<https://github.com/brain-score/language/tree/main/brainscore_language/metrics/pearson_correlation>`_
+
+
+.. _pearsonr:
+pearsonr
+;;;;;;;;
+
 
 
 Models
 ~~~~~~
+The following Models are available. Plugin identifiers are grouped by
+directory, shown in bold.
+
+Glove
++++++
+Location: `models/glove
+<https://github.com/brain-score/language/tree/main/brainscore_language/models/glove>`_
+
+
+.. _glove-840b:
+glove-840b
+;;;;;;;;;;
+
+
+Gpt
++++
+Location: `models/gpt
+<https://github.com/brain-score/language/tree/main/brainscore_language/models/gpt>`_
+
+
+.. _distilgpt2:
 distilgpt2
-++++++++++
-brainscore_language/models/gpt
+;;;;;;;;;;
 
 
+
+.. _gpt2-xl:
 gpt2-xl
-+++++++
-brainscore_language/models/gpt
+;;;;;;;
 
 
-glove-840b
-++++++++++
-brainscore_language/models/glove
+
+.. _gpt-neo-2.7B:
+gpt-neo-2.7B
+;;;;;;;;;;;;
+
+
+
+.. _gpt-neo-1.3B:
+gpt-neo-1.3B
+;;;;;;;;;;;;
+
+
+Earley_parser
++++++++++++++
+Location: `models/earley_parser
+<https://github.com/brain-score/language/tree/main/brainscore_language/models/earley_parser>`_
+
+
+.. _earley-parser-minivocab:
+earley-parser-minivocab
+;;;;;;;;;;;;;;;;;;;;;;;
+
+
+Lm1b
+++++
+Location: `models/lm1b
+<https://github.com/brain-score/language/tree/main/brainscore_language/models/lm1b>`_
+
+
+.. _lm1b:
+lm1b
+;;;;
+
+
+Random_embedding
+++++++++++++++++
+Location: `models/random_embedding
+<https://github.com/brain-score/language/tree/main/brainscore_language/models/random_embedding>`_
+
+
+.. _randomembedding-1600:
+randomembedding-1600
+;;;;;;;;;;;;;;;;;;;;
+
+
+
+.. _randomembedding-100:
+randomembedding-100
+;;;;;;;;;;;;;;;;;;;
+
+
+Rnng
+++++
+Location: `models/rnng
+<https://github.com/brain-score/language/tree/main/brainscore_language/models/rnng>`_
+
+
+.. _rnn-slm-ptb:
+rnn-slm-ptb
+;;;;;;;;;;;
+
+
+
+.. _rnn-tdg-ptb:
+rnn-tdg-ptb
+;;;;;;;;;;;
+
+
+
+.. _rnn-slm-ptboanc:
+rnn-slm-ptboanc
+;;;;;;;;;;;;;;;
+
+
+
+.. _rnn-tdg-ptboanc:
+rnn-tdg-ptboanc
+;;;;;;;;;;;;;;;
+
+
+
+.. _rnn-slm-ptboanc-1024:
+rnn-slm-ptboanc-1024
+;;;;;;;;;;;;;;;;;;;;
+
+
+
+.. _rnn-tdg-ptboanc-1024:
+rnn-tdg-ptboanc-1024
+;;;;;;;;;;;;;;;;;;;;
+
+
+Rnnlm
++++++
+Location: `models/rnnlm
+<https://github.com/brain-score/language/tree/main/brainscore_language/models/rnnlm>`_
+
+
+.. _rnn-lm-ptb:
+rnn-lm-ptb
+;;;;;;;;;;
 
 
 Bibliography

From ab75da583bd1a347e800b183a3927513f1f52c70 Mon Sep 17 00:00:00 2001
From: Deirdre Kelliher <dmkelliher@gmail.com>
Date: Mon, 22 Jan 2024 17:14:41 -0500
Subject: [PATCH 03/12] merging diverged docs and docs2 directories

---
 docs/Makefile                                 |   6 -
 docs/requirements.txt                         |   3 +-
 {docs2 => docs}/source/_static/overrides.css  |   0
 {docs2 => docs}/source/bibtex/data.bib        |   0
 {docs2 => docs}/source/bibtex/refs.bib        |   0
 docs/source/conf.py                           |   7 +
 docs/source/index.rst                         |   1 +
 docs/source/modules/benchmark_tutorial.rst    |   2 +-
 {docs2 => docs}/source/modules/plugins.rst    |   0
 docs2/Makefile                                |  20 -
 docs2/document_plugins.py                     | 157 --------
 docs2/requirements.txt                        |   5 -
 docs2/source/conf.py                          |  86 -----
 docs2/source/document_plugins.py              | 146 -------
 docs2/source/examples.md                      |   1 -
 docs2/source/index.rst                        |  28 --
 ...brainscore_language.artificial_subject.rst |  29 --
 .../brainscore_language.benchmark_helpers.rst |  31 --
 .../brainscore_language.document_plugins.rst  |  38 --
 ...score_language.model_helpers.container.rst |  29 --
 ...score_language.model_helpers.embedding.rst |  38 --
 ...ore_language.model_helpers.huggingface.rst |  29 --
 ...e_language.model_helpers.preprocessing.rst |  29 --
 .../brainscore_language.model_helpers.rst     |  34 --
 .../brainscore_language.parser.rst            |  29 --
 .../_autosummary/brainscore_language.rst      |  56 ---
 .../brainscore_language.submission.config.rst |  29 --
 .../brainscore_language.submission.rst        |  32 --
 .../brainscore_language.utils.ceiling.rst     |  29 --
 .../brainscore_language.utils.rst             |  41 --
 .../brainscore_language.utils.s3.rst          |  30 --
 ...inscore_language.utils.transformations.rst |  43 ---
 .../brainscore_language.utils.xarray.rst      |  31 --
 docs2/source/modules/api_reference.rst        |  10 -
 docs2/source/modules/artificial_subject.rst   |   9 -
 docs2/source/modules/benchmark_tutorial.rst   | 364 ------------------
 docs2/source/modules/model_tutorial.rst       |  93 -----
 37 files changed, 11 insertions(+), 1504 deletions(-)
 rename {docs2 => docs}/source/_static/overrides.css (100%)
 rename {docs2 => docs}/source/bibtex/data.bib (100%)
 rename {docs2 => docs}/source/bibtex/refs.bib (100%)
 rename {docs2 => docs}/source/modules/plugins.rst (100%)
 delete mode 100644 docs2/Makefile
 delete mode 100644 docs2/document_plugins.py
 delete mode 100644 docs2/requirements.txt
 delete mode 100644 docs2/source/conf.py
 delete mode 100644 docs2/source/document_plugins.py
 delete mode 100644 docs2/source/examples.md
 delete mode 100644 docs2/source/index.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.parser.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.submission.config.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.submission.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst
 delete mode 100644 docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst
 delete mode 100644 docs2/source/modules/api_reference.rst
 delete mode 100644 docs2/source/modules/artificial_subject.rst
 delete mode 100644 docs2/source/modules/benchmark_tutorial.rst
 delete mode 100644 docs2/source/modules/model_tutorial.rst

diff --git a/docs/Makefile b/docs/Makefile
index 92dd33a1..94265032 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,20 +1,14 @@
 # Minimal makefile for Sphinx documentation
-#
 
-# You can set these variables from the command line, and also
-# from the environment for the first two.
 SPHINXOPTS    ?=
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = source
 BUILDDIR      = _build
 
-# Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 .PHONY: help Makefile
 
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 23532fd2..ab14128c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,4 +2,5 @@
 
 Sphinx>=4
 sphinx_rtd_theme
-recommonmark
\ No newline at end of file
+recommonmark
+# sphinxcontrib-bibtex
\ No newline at end of file
diff --git a/docs2/source/_static/overrides.css b/docs/source/_static/overrides.css
similarity index 100%
rename from docs2/source/_static/overrides.css
rename to docs/source/_static/overrides.css
diff --git a/docs2/source/bibtex/data.bib b/docs/source/bibtex/data.bib
similarity index 100%
rename from docs2/source/bibtex/data.bib
rename to docs/source/bibtex/data.bib
diff --git a/docs2/source/bibtex/refs.bib b/docs/source/bibtex/refs.bib
similarity index 100%
rename from docs2/source/bibtex/refs.bib
rename to docs/source/bibtex/refs.bib
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 61b44176..ddc40a5b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -35,6 +35,9 @@
     'recommonmark',
     'sphinx.ext.viewcode',
     'sphinx_rtd_theme',
+    'sphinxcontrib.bibtex',
+    'rstcloth',
+    'sphinx.ext.autosectionlabel'
 ]
 autosummary_generate = True  # Turn on sphinx.ext.autosummary
 
@@ -53,6 +56,9 @@
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = []
 
+# allows BibTeX citations to be inserted into docs
+bibtex_bibfiles = ['bibtex/refs.bib']
+
 
 # -- Options for HTML output -------------------------------------------------
 
@@ -60,6 +66,7 @@
 # a list of builtin themes.
 #
 html_theme = "sphinx_rtd_theme"
+html_style = "overrides.css"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c137d24f..5675b826 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -25,4 +25,5 @@ The `score` function is the primary entry point to score a model on a benchmark.
    modules/benchmark_tutorial
    examples
    modules/api_reference
+   modules/plugins
    Glossary <https://brain-score-core.readthedocs.io/en/latest/modules/glossary.html>
diff --git a/docs/source/modules/benchmark_tutorial.rst b/docs/source/modules/benchmark_tutorial.rst
index b5f09446..01e016f1 100644
--- a/docs/source/modules/benchmark_tutorial.rst
+++ b/docs/source/modules/benchmark_tutorial.rst
@@ -208,7 +208,7 @@ For example:
 
         def __call__(self, candidate: ArtificialSubject) -> Score:
             candidate.start_behavioral_task(ArtificialSubject.Task.reading_times)  # or any other task
-            # or e.g. candidate.start_start_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
+            # or e.g. candidate.start_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
             #                                            recording_type=ArtificialSubject.RecordingType.fMRI)
             predictions = candidate.digest_text(stimuli)['behavior']
             raw_score = self.metric(predictions, self.data)
diff --git a/docs2/source/modules/plugins.rst b/docs/source/modules/plugins.rst
similarity index 100%
rename from docs2/source/modules/plugins.rst
rename to docs/source/modules/plugins.rst
diff --git a/docs2/Makefile b/docs2/Makefile
deleted file mode 100644
index 92dd33a1..00000000
--- a/docs2/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = source
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs2/document_plugins.py b/docs2/document_plugins.py
deleted file mode 100644
index 795e50e5..00000000
--- a/docs2/document_plugins.py
+++ /dev/null
@@ -1,157 +0,0 @@
-""" Make plugin details available to readthedocs """
-
-import json
-import logging
-from pathlib import Path
-import re
-from rstcloth import RstCloth
-from typing import Dict, List, Union
-
-# BIBS_DIR = 'docs/source/bibtex/'
-BIBS_DIR = 'source/bibtex/'
-# PLUGINS_DOC_FP = 'docs/source/modules/plugins.rst'
-PLUGINS_DOC_FP = 'source/modules/plugins2.rst'
-PLUGINS_LIST_FP = '../brainscore_language/plugin_management/all_plugins.json'
-PLUGIN_DIRS = ['benchmarks', 'data', 'metrics', 'models']
-PLUGIN_TYPE_MAP = {plugin_dirtype:plugin_dirtype.strip('s') for plugin_dirtype in PLUGIN_DIRS}
-
-
-def _get_module_plugin_names(plugin_type:str, plugin_dir:Path) -> List[str]:
-    """ Returns list of plugins registered by module """
-    init_fp = plugin_dir / "__init__.py"
-    registry = PLUGIN_TYPE_MAP[plugin_type] + "_registry"
-
-    with open(init_fp, 'r') as f:
-        text = f.read()
-        registered_plugins = re.findall(registry+'\[(.*)\]', text)
-        cleaned_plugin_names = [name.replace('"', '').replace('\'', '') for name in registered_plugins]
-    
-    return cleaned_plugin_names
-
-def _id_from_bibtex(bibtex:str) -> str:
-    """ Returns BibTeX identifier from BibTeX """
-    return re.search('\{(.*?),', bibtex).group(1)
-
-def get_all_plugin_info() -> Dict[str, Dict[str, Dict[str, Union[list, str, str]]]]:
-    """Add all plugins to respective type registries
-
-    Returns a dict where key is plugin type,
-    value is a dict where key is name of plugin dir,
-    value is a dict of plugin info:
-
-    plugin_names: list of names of all plugins registered by module
-    bibtex: a BibTeX string
-    bibtex_id: BibTeX identifier
-    """
-    all_plugin_info = {}
-    for plugin_type in PLUGIN_DIRS:
-        # plugins_dir = Path(Path(__file__).parents[1], plugin_type)
-        plugins_dir = Path(Path(__file__).resolve().parents[1], 'brainscore_language', plugin_type)
-        print('plugins_dir', plugins_dir.resolve())
-        for plugin_dir in plugins_dir.glob('[!._]*'):
-            idx = str(plugin_dir).index('brainscore_language')
-            # plugin_dirname = str(plugin_dir)
-            plugin_dirname = str(plugin_dir)[idx:]  
-            # print(plugin_dirname)
-  
-            if plugin_type not in all_plugin_info:
-                all_plugin_info[plugin_type] = {plugin_dirname:{}}
-            else:
-                all_plugin_info[plugin_type].update({plugin_dirname:{}})
-
-            plugin_dir_dict = all_plugin_info[plugin_type][plugin_dirname]
-
-            plugin_names = _get_module_plugin_names(plugin_type, plugin_dir)
-            plugin_module_path = plugin_dirname.replace('/', '.')
-            plugin_module = __import__(plugin_dirname, fromlist=['BIBTEX'])
-            # print('plugin_module', plugin_module)
-
-            plugin_dir_dict['plugin_names'] = plugin_names
-            if hasattr(plugin_module, 'BIBTEX'):
-                plugin_dir_dict['bibtex'] = plugin_module.BIBTEX
-                plugin_dir_dict['bibtex_id'] = _id_from_bibtex(plugin_module.BIBTEX) 
-
-    return all_plugin_info
-
-def _remove_duplicate_bibs(plugins_with_bibtex=Dict[str, Dict]):
-    """ Returns list of unique BibTeX to add """
-    bibtex_data = {v['bibtex_id']:v['bibtex'] for v in plugins_with_bibtex.values()}
-    alphabetized_bibtex = dict(sorted(bibtex_data.items()))
-    deduped_bibtex = list(alphabetized_bibtex.values())
-
-    return deduped_bibtex
-
-def _record_bibtex(bibtex_to_add:List[str], plugins_bib_fp:str):
-    """ insert new BibTeX into respective .bib files """
-    print('recording bibtex', Path(BIBS_DIR).resolve())
-    if not Path(BIBS_DIR).exists():
-        Path(BIBS_DIR).mkdir(parents=True)
-    print('plugins_bib_fp', plugins_bib_fp)
-    with open(plugins_bib_fp, "w+") as f:
-        for bibtex in bibtex_to_add:
-            f.write(bibtex)
-            f.write('\n')
-
-def create_bibfile(plugins=Dict[str, Dict], plugin_type='refs'):
-    """ For all plugins, add bibtex (if present) to .bib files """
-    if plugin_type == 'refs':
-        plugins = dict(ele for sub in plugins.values() for ele in sub.items())
-    # drop plugins without bibtex
-    plugins_with_bibtex = {k:v for k,v in plugins.items() if 'bibtex' in v.keys()}
-    if len(plugins_with_bibtex.keys()) > 0:
-        plugins_bib_fp = Path(BIBS_DIR + plugin_type + '.bib')
-        # add bibtex (if present) to .bib files
-        bibtex_to_add = _remove_duplicate_bibs(plugins_with_bibtex)
-        _record_bibtex(bibtex_to_add, plugins_bib_fp)
-
-def _prepare_content(all_plugin_info:Dict[str, Dict]) -> Dict[str, Dict]:
-    """Converts plugin information into rst format
-
-    Returns a dict where key is plugin type, value is a dict
-    of plugin names (str) mapped to a dict of their info
-
-    NOTE: info is currently plugin directory paths and BiBTeX citations, 
-    but could expand to e.g. include description of plugin
-    """
-    prepared_plugin_info = {}
-    for plugin_type in all_plugin_info:
-        plugin_type_title = plugin_type.capitalize()
-        prepared_plugin_info[plugin_type_title] = {name:{'dirname':k, 
-                                                        'citation':(':cite:label:`' + v['bibtex_id'] +'`'
-                                                        if 'bibtex_id' in v.keys() else None)} 
-                                                        for k,v in all_plugin_info[plugin_type].items() 
-                                                        for name in v['plugin_names']}
-    return prepared_plugin_info
-
-def _write_to_rst(plugin_info:Dict[str,Dict]):
-    """ Writes plugin info to readthedocs plugins.rst """
-    print(PLUGINS_DOC_FP)
-    with open(PLUGINS_DOC_FP, 'w+') as f:
-        doc = RstCloth(f)
-        doc.ref_target(name="plugins")
-        doc.newline()
-        doc.title('Plugins')
-        doc.newline()
-        for plugin_type in plugin_info:
-            doc.h3(plugin_type)
-            for plugin in plugin_info[plugin_type]:
-                doc.h4(plugin)
-                doc.content(plugin_info[plugin_type][plugin]['dirname'])
-                doc.newline()
-                if plugin_info[plugin_type][plugin]['citation']:
-                    doc.content(plugin_info[plugin_type][plugin]['citation'])
-                doc.newline()
-        doc.h2('Bibliography')
-        doc.directive(name="bibliography", fields=[('all','')])
-
-def update_readthedocs(all_plugin_info:Dict[str,Dict]):
-    """ For all plugins, add name and info to readthedocs (plugins.rst) """
-    prepared_plugin_info = _prepare_content(all_plugin_info) # rst formatting
-    _write_to_rst(prepared_plugin_info)
-
-if __name__ == '__main__':
-    all_plugin_info = get_all_plugin_info()
-    for plugin_type in all_plugin_info:
-        create_bibfile(all_plugin_info[plugin_type], plugin_type) # plugin type .bib file
-    create_bibfile(all_plugin_info) # one .bib file to rule them all
-    update_readthedocs(all_plugin_info)
diff --git a/docs2/requirements.txt b/docs2/requirements.txt
deleted file mode 100644
index b896045d..00000000
--- a/docs2/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
--e .
-
-Sphinx>=4
-sphinx_rtd_theme
-sphinxcontrib-bibtex
diff --git a/docs2/source/conf.py b/docs2/source/conf.py
deleted file mode 100644
index ddc40a5b..00000000
--- a/docs2/source/conf.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('../..'))
-
-# -- Project information -----------------------------------------------------
-
-project = 'Brain-Score Language'
-copyright = '2022, Brain-Score Team'
-author = 'Brain-Score Team'
-
-# The full version, including alpha/beta/rc tags
-release = '0.1'
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'recommonmark',
-    'sphinx.ext.viewcode',
-    'sphinx_rtd_theme',
-    'sphinxcontrib.bibtex',
-    'rstcloth',
-    'sphinx.ext.autosectionlabel'
-]
-autosummary_generate = True  # Turn on sphinx.ext.autosummary
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-source_suffix = ['.rst', '.md']
-
-# The master toctree document.
-master_doc = 'index'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
-
-# allows BibTeX citations to be inserted into docs
-bibtex_bibfiles = ['bibtex/refs.bib']
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "sphinx_rtd_theme"
-html_style = "overrides.css"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-
-def skip(app, what, name, obj, would_skip, options):
-    if name in ("__init__", "__call__"):
-        return False
-    if name.startswith('test') or name.startswith('Test'):
-        return False
-    return would_skip
-
-
-def setup(app):
-    app.connect("autodoc-skip-member", skip)
diff --git a/docs2/source/document_plugins.py b/docs2/source/document_plugins.py
deleted file mode 100644
index c51016a0..00000000
--- a/docs2/source/document_plugins.py
+++ /dev/null
@@ -1,146 +0,0 @@
-""" Make plugin details available to readthedocs """
-
-import json
-import logging
-from pathlib import Path
-import re
-from rstcloth import RstCloth
-from typing import Dict, List, Union
-
-BIBS_DIR = 'docs/source/bibtex/'
-PLUGINS_DOC_FP = 'docs/source/modules/plugins.rst'
-PLUGINS_LIST_FP = 'brainscore_language/plugin_management/all_plugins.json'
-PLUGIN_DIRS = ['benchmarks', 'data', 'metrics', 'models']
-PLUGIN_TYPE_MAP = {plugin_dirtype:plugin_dirtype.strip('s') for plugin_dirtype in PLUGIN_DIRS}
-
-
-def _get_module_plugin_names(plugin_type:str, plugin_dir:Path) -> List[str]:
-    """ Returns list of plugins registered by module """
-    init_fp = plugin_dir / "__init__.py"
-    registry = PLUGIN_TYPE_MAP[plugin_type] + "_registry"
-
-    with open(init_fp, 'r') as f:
-        text = f.read()
-        registered_plugins = re.findall(registry+'\[(.*)\]', text)
-        cleaned_plugin_names = [name.replace('"', '').replace('\'', '') for name in registered_plugins]
-    
-    return cleaned_plugin_names
-
-def _id_from_bibtex(bibtex:str) -> str:
-    """ Returns BibTeX identifier from BibTeX """
-    return re.search('\{(.*?),', bibtex).group(1)
-
-def get_all_plugin_info() -> Dict[str, Dict[str, Dict[str, Union[list, str, str]]]]:
-    """Add all plugins to respective type registries
-
-    Returns a dict where key is plugin type,
-    value is a dict where key is name of plugin dir,
-    value is a dict of plugin info:
-
-    plugin_names: list of names of all plugins registered by module
-    bibtex: a BibTeX string
-    bibtex_id: BibTeX identifier
-    """
-    all_plugin_info = {}
-    for plugin_type in PLUGIN_DIRS:
-        plugins_dir = Path(Path(__file__).parents[1], plugin_type)
-        for plugin_dir in plugins_dir.glob('[!._]*'):
-            plugin_dirname = str(plugin_dir)
-  
-            if plugin_type not in all_plugin_info:
-                all_plugin_info[plugin_type] = {plugin_dirname:{}}
-            else:
-                all_plugin_info[plugin_type].update({plugin_dirname:{}})
-
-            plugin_dir_dict = all_plugin_info[plugin_type][plugin_dirname]
-
-            plugin_names = _get_module_plugin_names(plugin_type, plugin_dir)
-            plugin_module_path = plugin_dirname.replace('/', '.')
-            plugin_module = __import__(plugin_module_path, fromlist=['BIBTEX'])
-
-            plugin_dir_dict['plugin_names'] = plugin_names
-            if hasattr(plugin_module, 'BIBTEX'):
-                plugin_dir_dict['bibtex'] = plugin_module.BIBTEX
-                plugin_dir_dict['bibtex_id'] = _id_from_bibtex(plugin_module.BIBTEX) 
-
-    return all_plugin_info
-
-def _remove_duplicate_bibs(plugins_with_bibtex=Dict[str, Dict]):
-    """ Returns list of unique BibTeX to add """
-    bibtex_data = {v['bibtex_id']:v['bibtex'] for v in plugins_with_bibtex.values()}
-    alphabetized_bibtex = dict(sorted(bibtex_data.items()))
-    deduped_bibtex = list(alphabetized_bibtex.values())
-
-    return deduped_bibtex
-
-def _record_bibtex(bibtex_to_add:List[str], plugins_bib_fp:str):
-    """ insert new BibTeX into respective .bib files """
-    if not Path(BIBS_DIR).exists():
-        Path(BIBS_DIR).mkdir(parents=True)
-    with open(plugins_bib_fp, "w+") as f:
-        for bibtex in bibtex_to_add:
-            f.write(bibtex)
-            f.write('\n')
-
-def create_bibfile(plugins=Dict[str, Dict], plugin_type='refs'):
-    """ For all plugins, add bibtex (if present) to .bib files """
-    if plugin_type == 'refs':
-        plugins = dict(ele for sub in plugins.values() for ele in sub.items())
-    # drop plugins without bibtex
-    plugins_with_bibtex = {k:v for k,v in plugins.items() if 'bibtex' in v.keys()}
-    if len(plugins_with_bibtex.keys()) > 0:
-        plugins_bib_fp = Path(BIBS_DIR + plugin_type + '.bib')
-        # add bibtex (if present) to .bib files
-        bibtex_to_add = _remove_duplicate_bibs(plugins_with_bibtex)
-        _record_bibtex(bibtex_to_add, plugins_bib_fp)
-
-def _prepare_content(all_plugin_info:Dict[str, Dict]) -> Dict[str, Dict]:
-    """Converts plugin information into rst format
-
-    Returns a dict where key is plugin type, value is a dict
-    of plugin names (str) mapped to a dict of their info
-
-    NOTE: info is currently plugin directory paths and BiBTeX citations, 
-    but could expand to e.g. include description of plugin
-    """
-    prepared_plugin_info = {}
-    for plugin_type in all_plugin_info:
-        plugin_type_title = plugin_type.capitalize()
-        prepared_plugin_info[plugin_type_title] = {name:{'dirname':k, 
-                                                        'citation':(':cite:label:`' + v['bibtex_id'] +'`'
-                                                        if 'bibtex_id' in v.keys() else None)} 
-                                                        for k,v in all_plugin_info[plugin_type].items() 
-                                                        for name in v['plugin_names']}
-    return prepared_plugin_info
-
-def _write_to_rst(plugin_info:Dict[str,Dict]):
-    """ Writes plugin info to readthedocs plugins.rst """
-    with open(PLUGINS_DOC_FP, 'w+') as f:
-        doc = RstCloth(f)
-        doc.ref_target(name="plugins")
-        doc.newline()
-        doc.title('Plugins')
-        doc.newline()
-        for plugin_type in plugin_info:
-            doc.h3(plugin_type)
-            for plugin in plugin_info[plugin_type]:
-                doc.h4(plugin)
-                doc.content(plugin_info[plugin_type][plugin]['dirname'])
-                doc.newline()
-                if plugin_info[plugin_type][plugin]['citation']:
-                    doc.content(plugin_info[plugin_type][plugin]['citation'])
-                doc.newline()
-        doc.h2('Bibliography')
-        doc.directive(name="bibliography", fields=[('all','')])
-
-def update_readthedocs(all_plugin_info:Dict[str,Dict]):
-    """ For all plugins, add name and info to readthedocs (plugins.rst) """
-    prepared_plugin_info = _prepare_content(all_plugin_info) # rst formatting
-    _write_to_rst(prepared_plugin_info)
-
-if __name__ == '__main__':
-    all_plugin_info = get_all_plugin_info()
-    for plugin_type in all_plugin_info:
-        create_bibfile(all_plugin_info[plugin_type], plugin_type) # plugin type .bib file
-    create_bibfile(all_plugin_info) # one .bib file to rule them all
-    update_readthedocs(all_plugin_info)
diff --git a/docs2/source/examples.md b/docs2/source/examples.md
deleted file mode 100644
index 6fa53604..00000000
--- a/docs2/source/examples.md
+++ /dev/null
@@ -1 +0,0 @@
-../../examples/README.md
\ No newline at end of file
diff --git a/docs2/source/index.rst b/docs2/source/index.rst
deleted file mode 100644
index 87594746..00000000
--- a/docs2/source/index.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Brain-Score Language
-====================
-
-Brain-Score is a collection of benchmarks and models:
-benchmarks combine neural/behavioral data with a metric to score models on their alignment to humans,
-and models are evaluated as computational hypotheses of human brain processing.
-
-The Brain-Score Language library contains
-benchmarks that can easily be used to test language models on their alignment to human behavioral and internal brain
-processing,
-as well as language models that can easily be tested on new behavioral or neural data.
-This makes experimental data accessible to modelers, and computational models accessible to experimenters,
-accelerating progress in discovering ever-more-accurate models of the human brain and mind.
-
-The `score` function is the primary entry point to score a model on a benchmark.
-
-.. autofunction:: brainscore_language.score
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-   modules/artificial_subject
-   modules/model_tutorial
-   modules/benchmark_tutorial
-   examples
-   modules/api_reference
-   modules/plugins
diff --git a/docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst b/docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst
deleted file mode 100644
index 7b19b2a1..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.artificial_subject.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-brainscore\_language.artificial\_subject
-========================================
-
-.. automodule:: brainscore_language.artificial_subject
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Classes
-
-   .. autosummary::
-   
-      ArtificialSubject
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst b/docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst
deleted file mode 100644
index f4f5eb59..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.benchmark_helpers.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-brainscore\_language.benchmark\_helpers
-=======================================
-
-.. automodule:: brainscore_language.benchmark_helpers
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      _coords_match
-      ci_error
-      manual_merge
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst b/docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst
deleted file mode 100644
index 4311171c..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.document_plugins.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-brainscore\_language.document\_plugins
-======================================
-
-.. automodule:: brainscore_language.document_plugins
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      _data_metric_from_benchmark
-      _get_module_plugin_names
-      _id_from_bibtex
-      _prepare_content
-      _record_bibtex
-      _remove_duplicate_bibs
-      _write_to_rst
-      create_bibfile
-      get_all_plugin_info
-      update_readthedocs
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst
deleted file mode 100644
index 192fdd62..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.container.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-brainscore\_language.model\_helpers.container
-=============================================
-
-.. automodule:: brainscore_language.model_helpers.container
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Classes
-
-   .. autosummary::
-   
-      ContainerSubject
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst
deleted file mode 100644
index ef6e0d31..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.embedding.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-brainscore\_language.model\_helpers.embedding
-=============================================
-
-.. automodule:: brainscore_language.model_helpers.embedding
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      mean_over_words
-      remove_punctuation
-   
-   
-
-   
-   
-   .. rubric:: Classes
-
-   .. autosummary::
-   
-      EmbeddingSubject
-      GensimKeyedVectorsSubject
-      _GensimLookup
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst
deleted file mode 100644
index 645b5417..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.huggingface.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-brainscore\_language.model\_helpers.huggingface
-===============================================
-
-.. automodule:: brainscore_language.model_helpers.huggingface
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Classes
-
-   .. autosummary::
-   
-      HuggingfaceSubject
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst
deleted file mode 100644
index da22d03a..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.preprocessing.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-brainscore\_language.model\_helpers.preprocessing
-=================================================
-
-.. automodule:: brainscore_language.model_helpers.preprocessing
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      prepare_context
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst b/docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst
deleted file mode 100644
index 6b72746e..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.model_helpers.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-brainscore\_language.model\_helpers
-===================================
-
-.. automodule:: brainscore_language.model_helpers
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
-   :toctree:
-   :recursive:
-
-   brainscore_language.model_helpers.container
-   brainscore_language.model_helpers.embedding
-   brainscore_language.model_helpers.huggingface
-   brainscore_language.model_helpers.preprocessing
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.parser.rst b/docs2/source/modules/_autosummary/brainscore_language.parser.rst
deleted file mode 100644
index 5f95208e..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.parser.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-brainscore\_language.parser
-===========================
-
-.. automodule:: brainscore_language.parser
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      parse
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.rst b/docs2/source/modules/_autosummary/brainscore_language.rst
deleted file mode 100644
index 383aba0e..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.rst
+++ /dev/null
@@ -1,56 +0,0 @@
-﻿brainscore\_language
-====================
-
-.. automodule:: brainscore_language
-
-   
-   
-   .. rubric:: Module Attributes
-
-   .. autosummary::
-   
-      data_registry
-      metric_registry
-      benchmark_registry
-      model_registry
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      _run_score
-      load_benchmark
-      load_dataset
-      load_metric
-      load_model
-      score
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
-   :toctree:
-   :recursive:
-
-   brainscore_language.artificial_subject
-   brainscore_language.benchmark_helpers
-   brainscore_language.document_plugins
-   brainscore_language.model_helpers
-   brainscore_language.submission
-   brainscore_language.utils
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.submission.config.rst b/docs2/source/modules/_autosummary/brainscore_language.submission.config.rst
deleted file mode 100644
index 6af546cc..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.submission.config.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-brainscore\_language.submission.config
-======================================
-
-.. automodule:: brainscore_language.submission.config
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      get_database_secret
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.submission.rst b/docs2/source/modules/_autosummary/brainscore_language.submission.rst
deleted file mode 100644
index 3dc7231e..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.submission.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-brainscore\_language.submission
-===============================
-
-.. automodule:: brainscore_language.submission
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
-   :toctree:
-   :recursive:
-
-   brainscore_language.submission.config
-   brainscore_language.submission.endpoints
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst
deleted file mode 100644
index 6983b24d..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.utils.ceiling.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-brainscore\_language.utils.ceiling
-==================================
-
-.. automodule:: brainscore_language.utils.ceiling
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      ceiling_normalize
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.rst
deleted file mode 100644
index deeb61b8..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.utils.rst
+++ /dev/null
@@ -1,41 +0,0 @@
-brainscore\_language.utils
-==========================
-
-.. automodule:: brainscore_language.utils
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      attach_presentation_meta
-      fullname
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
-.. rubric:: Modules
-
-.. autosummary::
-   :toctree:
-   :recursive:
-
-   brainscore_language.utils.ceiling
-   brainscore_language.utils.s3
-   brainscore_language.utils.transformations
-   brainscore_language.utils.xarray
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst
deleted file mode 100644
index 7a9f7169..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.utils.s3.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-brainscore\_language.utils.s3
-=============================
-
-.. automodule:: brainscore_language.utils.s3
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      load_from_s3
-      upload_data_assembly
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst
deleted file mode 100644
index ad8e05bb..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.utils.transformations.rst
+++ /dev/null
@@ -1,43 +0,0 @@
-brainscore\_language.utils.transformations
-==========================================
-
-.. automodule:: brainscore_language.utils.transformations
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      apply_aggregate
-      enumerate_done
-      extract_coord
-      standard_error_of_the_mean
-   
-   
-
-   
-   
-   .. rubric:: Classes
-
-   .. autosummary::
-   
-      CrossValidation
-      CrossValidationSingle
-      Split
-      TestOnlyCrossValidation
-      TestOnlyCrossValidationSingle
-      Transformation
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst b/docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst
deleted file mode 100644
index 7725655a..00000000
--- a/docs2/source/modules/_autosummary/brainscore_language.utils.xarray.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-brainscore\_language.utils.xarray
-=================================
-
-.. automodule:: brainscore_language.utils.xarray
-
-   
-   
-   
-
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-   
-      collapse_multidim_coord
-      copy_metadata
-      fix_xr_dtypes
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/docs2/source/modules/api_reference.rst b/docs2/source/modules/api_reference.rst
deleted file mode 100644
index b87217db..00000000
--- a/docs2/source/modules/api_reference.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-.. _api_reference:
-
-API Reference
--------------
-
-.. autosummary::
-   :toctree: _autosummary
-   :recursive:
-
-   brainscore_language
diff --git a/docs2/source/modules/artificial_subject.rst b/docs2/source/modules/artificial_subject.rst
deleted file mode 100644
index 9cb4e82a..00000000
--- a/docs2/source/modules/artificial_subject.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-.. _artificial_subject:
-
-*********************************
-ArtificialSubject model interface
-*********************************
-
-.. autoclass:: brainscore_language.artificial_subject.ArtificialSubject
-    :members:
-    :undoc-members:
diff --git a/docs2/source/modules/benchmark_tutorial.rst b/docs2/source/modules/benchmark_tutorial.rst
deleted file mode 100644
index 3f7e3946..00000000
--- a/docs2/source/modules/benchmark_tutorial.rst
+++ /dev/null
@@ -1,364 +0,0 @@
-.. _new_benchmark_tutorial:
-
-**********************
-New Benchmark Tutorial
-**********************
-
-This example walks through adding a new benchmark and scoring existing models on it.
-Everything can be developed locally with full access to publicly available models,
-but we strongly encourage you to submit your benchmark to Brain-Score to make it accessible to the community,
-and to make it into a goalpost that future models can be measured against.
-
-If you haven't already, check out
-`other benchmarks <https://github.com/brain-score/language/tree/main/brainscore_language/benchmarks>`_
-and the `docs <https://brain-score-language.readthedocs.io>`_.
-
-A benchmark reproduces the experimental paradigm on a model candidate,
-and tests model predictions against the experimentally observed data,
-using a similarity metric.
-
-In other words, a benchmark consists of three things (each of which is a plugin):
-
-1. experimental paradigm
-2. biological data (neural/behavioral)
-3. similarity metric
-
-For the biological data and the similarity metric, benchmarks can use previously submitted data and metrics.
-I.e., re-combinations are very much valid.
-
-Brain-Score secondarily also hosts benchmarks that do not pertain to neural or behavioral data,
-e.g. engineering (ML) benchmarks and other analyses. These benchmarks do not include biological data,
-and the metric might be ground-truth accuracy.
-
-
-1. Package data (optional)
-==========================
-
-You can contribute new data by submitting a data plugin.
-If you are building a benchmark using existing data, you can skip this step.
-
-We use the `BrainIO <https://github.com/brain-score/brainio>`_ format to organize data.
-Datasets in brainio are called *assemblies* and are based on `xarray <https://xarray.dev>`_,
-a multi-dimensional version of pandas, which allows for metadata on numpy arrays of arbitrary dimensionality.
-
-Most assemblies contain a :code:`presentation` dimension for the stimuli that were presented, as well as potentially other
-dimensions for e.g. different subjects or different voxels.
-The actual measurements (e.g. reading times, or voxel activity) are typically the values of an assembly.
-
-Behavioral data
----------------
-
-The following is an excerpt from the
-`Futrell2018 data packaging <https://github.com/brain-score/language/blob/3e6fff2fda528f06cf5ffb3c5474f81acfa91ffe/brainscore_language/data/futrell2018/data_packaging.py>`_.
-
-.. code-block:: python
-
-    from brainio.assemblies import BehavioralAssembly
-
-    reading_times = parse_experiment_data(...)  # load the experimental data, e.g. from .csv files
-    # ... obtain as much metadata as we can ...
-
-    assembly = BehavioralAssembly(reading_times, coords={
-            'word': ('presentation', voc_word),
-            'stimulus_id': ('presentation', stimulus_ID),
-            ...
-            'subject_id': ('subject', subjects),
-            'WorkTimeInSeconds': ('subject', WorkTimeInSeconds_meta),
-            ...
-            }, dims=('presentation', 'subject'))
-
-Neural data
------------
-
-The following is an excerpt from the
-`Pereira2018 data packaging <https://github.com/brain-score/language/blob/189eed0c2396756fc419115e57633410d0347e59/brainscore_language/data/pereira2018/data_packaging.py>`_.
-
-.. code-block:: python
-
-    from brainio.assemblies import NeuroidAssembly
-
-    neural_recordings = parse_experiment_data(...)  # load the experimental data, e.g. from .mat files
-    # ... obtain as much metadata as we can ...
-
-    assembly = NeuroidAssembly(neural_recordings, coords={
-           'stimulus': ('presentation', sentences),
-           'stimulus_id': ('presentation', stimulus_id),
-           ...
-           'neuroid_id': ('neuroid', voxel_number),
-           'atlas': ('neuroid', atlases),
-           ...
-           }, dims=['presentation', 'neuroid'])
-
-Register the data plugin
-------------------------
-
-So that your data can be accessed via an identifier, you need to define an endpoint in the plugin registry.
-
-For instance, if your data is on S3, the plugin might look as follows:
-
-.. code-block:: python
-
-    from brainscore_language.utils.s3 import load_from_s3
-
-    def load_assembly() -> BehavioralAssembly:
-        assembly = load_from_s3(
-            identifier="Futrell2018",
-            version_id="MpR.gIXN8UrUnqwQyj.kCrh4VWrBvsGf",
-            sha1="381ccc8038fbdb31235b5f3e1d350f359b5e287f")
-        return assembly
-
-    data_registry['Futrell2018'] = load_assembly
-
-Unit tests
-----------
-
-To ensure the data is in the right format, and not corrupted by any future changes, we require all plugins to include
-an accompanying :code:`test.py` file with unit tests.
-
-For instance, here is a small unit test example validating the dimensions of a reading times dataset.
-
-
-.. code-block:: python
-
-    from brainscore_language import load_dataset
-
-    def test_shape(self):
-        assembly = load_dataset('Futrell2018')
-        assert len(assembly['presentation']) == 10256
-        assert len(assembly['subject']) == 180
-
-These unit tests guarantee the continued validity of your plugin, so we encourage rigorous testing methods.
-
-
-2. Create metric (optional)
-===========================
-
-You can contribute a new metric by submitting a metric plugin.
-If you are building a benchmark using an existing metric, you can skip this step.
-
-Metrics compute the similarity between two measurements.
-These can be model-vs-human, human-vs-human, or model-model.
-Measurements could for instance be reading times, or fMRI recordings.
-
-A simple metric could be the pearson correlation of two measurements:
-
-.. code-block:: python
-
-    import numpy as np
-    from scipy.stats import pearsonr
-    from brainio.assemblies import DataAssembly
-    from brainscore_core.metrics import Metric, Score
-
-    class PearsonCorrelation(Metric):
-        def __call__(self, assembly1: DataAssembly, assembly2: DataAssembly) -> Score:
-            rvalue, pvalue = pearsonr(assembly1, assembly2)
-            score = Score(np.abs(rvalue))  # similarity score between 0 and 1 indicating alignment of the two assemblies
-            return score
-
-    metric_registry['pearsonr'] = PearsonCorrelation
-
-This is a very simple example and ignores e.g. checks ensuring the ordering is the same, cross-validation,
-or keeping track of metadata.
-
-Unit tests
-----------
-
-As with all plugins, please provide a :code:`test.py` file to ensure the continued validity of your metric.
-For instance, the following is an excerpt from the
-`pearson correlation tests <https://github.com/brain-score/language/blob/3e6fff2fda528f06cf5ffb3c5474f81acfa91ffe/brainscore_language/metrics/pearson_correlation/test.py>`_.
-
-.. code-block:: python
-
-    from brainscore_language import load_metric
-
-    def test_weak_correlation():
-        a1 = [1, 2, 3, 4, 5]
-        a2 = [3, 1, 6, 1, 2]
-        metric = load_metric('pearsonr')
-        score = metric(a1, a2)
-        assert score == approx(.152, abs=.005)
-
-
-3. Build the benchmark
-======================
-
-With data and metric in place, you can put the two together to build a benchmark that scores model similarity to
-behavioral or neural measurements.
-
-Structure
----------
-
-A benchmark runs the experiment on a (model) subject candidate in the :code:`__call__` method,
-and compares model predictions against experimental data.
-All interactions with the model are via methods defined in the :doc:`ArtificialSubject <./artificial_subject>` interface
--- this allows all present and future models to be tested on your benchmark.
-
-For example:
-
-.. code-block:: python
-
-    from brainscore_core.benchmarks import BenchmarkBase
-    from brainscore_language import load_dataset, load_metric, ArtificialSubject
-
-    class MyBenchmark(BenchmarkBase):
-        def __init__(self):
-            self.data = load_dataset('mydata')
-            self.metric = load_metric('pearsonr')
-            ...
-
-        def __call__(self, candidate: ArtificialSubject) -> Score:
-            candidate.perform_behavioral_task(ArtificialSubject.Task.reading_times)  # or any other task
-            # or e.g. candidate.perform_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
-            #                                            recording_type=ArtificialSubject.RecordingType.fMRI)
-            predictions = candidate.digest_text(stimuli)['behavior']
-            raw_score = self.metric(predictions, self.data)
-            score = ceiling_normalize(raw_score, self.ceiling)
-            return score
-
-
-Behavioral benchmark
---------------------
-
-To test for behavioral alignment, benchmarks compare model outputs to human behavioral measurements.
-The model is instructed to perform a certain task (e.g. output reading times), and then prompted to digest text input,
-for which it will output behavioral predictions.
-
-For instance, here is a sample excerpt from the
-`Futrell2018 benchmark <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/futrell2018/__init__.py>`_
-comparing reading times:
-
-.. code-block:: python
-
-    class Futrell2018Pearsonr(BenchmarkBase):
-        ...
-
-        def __call__(self, candidate: ArtificialSubject) -> Score:
-            candidate.perform_behavioral_task(ArtificialSubject.Task.reading_times)
-            stimuli = self.data['stimulus']
-            predictions = candidate.digest_text(stimuli.values)['behavior']
-            raw_score = self.metric(predictions, self.data)
-            score = ceiling_normalize(raw_score, self.ceiling)
-            return score
-
-    benchmark_registry['Futrell2018-pearsonr'] = Futrell2018Pearsonr
-
-Neural benchmark
-----------------
-
-To test for neural alignment, benchmarks compare model internals to human internal neural activity,
-measured e.g. via fMRI or ECoG.
-Running the experiment on the model subject, the benchmark first instructs where and how to perform neural recording,
-and then prompts the subject with text input, for which the model will output neural predictions.
-
-For instance, here is a sample excerpt from the
-`Pereira2018 linear-predictivity benchmark <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/pereira2018/__init__.py#L55>`_
-linearly comparing fMRI activity:
-
-.. code-block:: python
-
-    class Pereira2018Linear(BenchmarkBase):
-        ...
-
-        def __call__(self, candidate: ArtificialSubject) -> Score:
-            candidate.perform_neural_recording(recording_target=ArtificialSubject.RecordingTarget.language_system,
-                                               recording_type=ArtificialSubject.RecordingType.fMRI)
-            stimuli = self.data['stimulus']
-            predictions = candidate.digest_text(stimuli.values)['neural']
-            raw_score = self.metric(predictions, self.data)
-            score = ceiling_normalize(raw_score, self.ceiling)
-            return score
-
-    benchmark_registry['Pereira2018-linear'] = Pereira2018Linear
-
-Ceiling
--------
-
-You might have noticed that model alignment scores are always relative to a ceiling.
-The ceiling is an estimate of how well the "perfect model" would perform.
-Often, this is an estimate of how well an average human is aligned to the specific data.
-
-For instance, the `Pereira2018 ceiling <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/pereira2018/ceiling_packaging.py#L52>`_
-compares the linear alignment (i.e. using the same metric) of n-1 subjects to a heldout subject.
-The `Futrell2018 ceiling <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/futrell2018/__init__.py#L59>`_
-compares how well one half of subjects is aligned to the other half of subjects,
-again using the same metric that is used for model comparisons.
-
-Running models on your benchmark
---------------------------------
-
-You can now locally run models on your benchmark
-(see `4. Submit to Brain-Score`_ for running models on the Brain-Score platform).
-Run the `score function <https://brain-score-language.readthedocs.io/en/latest/index.html#brainscore_language.score>`_,
-passing in the desired model identifier(s) and the identifier for your benchmark.
-
-For instance, you might run:
-
-.. code-block:: python
-
-    from brainscore_language import score
-
-    model_score = score(model_identifier='distilgpt2', benchmark_identifier='benchmarkid-metricid')
-
-Unit tests
-----------
-
-As with all plugins, please provide a :code:`test.py` file to ensure the continued validity of your benchmark.
-For instance, the following is an excerpt from the
-`Futrell2018 tests <https://github.com/brain-score/language/blob/85afdae5294d0613fb51c33333aa76c52fc0849e/brainscore_language/benchmarks/futrell2018/test.py>`_:
-
-.. code-block:: python
-
-    from brainscore_language import ArtificialSubject, load_benchmark
-
-    class DummyModel(ArtificialSubject):
-        def __init__(self, reading_times):
-            self.reading_times = reading_times
-
-        def digest_text(self, stimuli):
-            return {'behavior': BehavioralAssembly(self.reading_times, coords={
-                                        'context': ('presentation', stimuli),
-                                        'stimulus_id': ('presentation', np.arange(len(stimuli)))},
-                                    dims=['presentation'])}
-
-        def perform_behavioral_task(self, task: ArtificialSubject.Task):
-            if task != ArtificialSubject.Task.reading_times:
-                raise NotImplementedError()
-
-    def test_dummy_bad():
-        benchmark = load_benchmark('Futrell2018-pearsonr')
-        reading_times = RandomState(0).random(10256)
-        dummy_model = DummyModel(reading_times=reading_times)
-        score = benchmark(dummy_model)
-        assert score == approx(0.0098731 / .858, abs=0.001)
-
-    def test_ceiling():
-        benchmark = load_benchmark('Futrell2018-pearsonr')
-        ceiling = benchmark.ceiling
-        assert ceiling == approx(.858, abs=.0005)
-        assert ceiling.raw.median('split') == ceiling
-        assert ceiling.uncorrected_consistencies.median('split') < ceiling
-
-4. Submit to Brain-Score
-========================
-
-To share your plugins (data, metrics, and/or benchmarks) with the community
-and to make them accessible for continued model evaluation,
-please submit them to the platform.
-
-There are two main ways to do that:
-
-1. By uploading a zip file on the website
-2. By submitting a github pull request with the proposed changes
-
-Both options result in the same outcome: your plugin will automatically be tested,
-and added to the codebase after it passes tests.
-
-Particulars on data
--------------------
-
-To make data assemblies accessible for Brain-Score model evaluations, it needs to be uploaded.
-You can self-host your data (e.g. on S3/OSF), or contact us to host your data on S3.
-You can also choose to keep your data private such that models can be scored, but the data cannot be accessed.
-
-For uploading data to S3, see the :code:`upload_data_assembly`
-in `utils/s3 <https://github.com/brain-score/language/blob/main/brainscore_language/utils/s3.py>`_.
diff --git a/docs2/source/modules/model_tutorial.rst b/docs2/source/modules/model_tutorial.rst
deleted file mode 100644
index 40da77b2..00000000
--- a/docs2/source/modules/model_tutorial.rst
+++ /dev/null
@@ -1,93 +0,0 @@
-.. _new_model_tutorial:
-
-******************
-New Model Tutorial
-******************
-
-This example walks through adding a new model and scoring it on existing benchmarks.
-Everything can be developed locally with full access to publicly available benchmarks,
-but we strongly encourage you to submit your model to Brain-Score to make it accessible to the community,
-and to make it testable on future benchmarks.
-
-If you haven't already, check out
-`other models <https://github.com/brain-score/language/tree/main/brainscore_language/models>`_
-and the `docs <https://brain-score-language.readthedocs.io>`_.
-
-
-Adding the model plugin
-=======================
-
-We require models to implement the :doc:`ArtificialSubject API <./artificial_subject>`.
-This interface is the central communication point between models and benchmarks
-and guarantees that your model can be evaluated on all available benchmarks.
-It includes three central methods that set the model up for performing a behavioral task,
-for performing neural recordings, and for digesting text with behavioral and/or neural outputs.
-A model does not have to implement all three methods, it can for instance only engage on behavior, or only on neurons.
-
-HuggingFace models
-------------------
-
-For models on HuggingFace, we provide a simple :code:`HuggingfaceSubject` wrapper that lets you instantiate models in
-very few lines of code since the wrapper takes care of implementing
-the :doc:`ArtificialSubject <./artificial_subject>` interface.
-The main choice you will have to make is which layer corresponds to which brain region.
-For instance, the following is an excerpt from adding
-`gpt models <https://github.com/brain-score/language/blob/5e948f0be90327aefe5e2938b2b3a193d0109af2/brainscore_language/models/gpt/__init__.py>`_:
-
-.. code-block:: python
-
-    from brainscore_language.model_helpers.huggingface import HuggingfaceSubject
-
-    model_registry['distilgpt2'] = lambda: HuggingfaceSubject(model_id='distilgpt2', region_layer_mapping={
-        ArtificialSubject.RecordingTarget.language_system: 'transformer.h.5.mlp.dropout'})
-
-Unit tests
-----------
-
-As with all plugins, please provide a :code:`test.py` file to ensure the continued validity of your model.
-For instance, the following is an excerpt from the
-`tests for gpt2-xl <https://github.com/brain-score/language/blob/5e948f0be90327aefe5e2938b2b3a193d0109af2/brainscore_language/models/gpt/test.py>`_:
-
-.. code-block:: python
-
-    from brainscore_language import load_model
-
-    def test_next_word(model_identifier, ):
-        model = load_model('gpt2-xl')
-        text = ['the quick brown fox', 'jumps over', 'the lazy']
-        expected_next_words = ['jumps', 'the', 'dog']
-        model.perform_behavioral_task(task=ArtificialSubject.Task.next_word)
-        next_word_predictions = model.digest_text(text)['behavior']
-        np.testing.assert_array_equal(next_word_predictions, expected_next_words)
-
-
-Running your model on benchmarks
-================================
-
-You can now locally run models on your benchmark
-(see `Submit to Brain-Score`_ for running models on the Brain-Score platform).
-Run the `score function <https://brain-score-language.readthedocs.io/en/latest/index.html#brainscore_language.score>`_,
-passing in the desired benchmark identifier(s) and the identifier for your model.
-
-For instance, you might run:
-
-.. code-block:: python
-
-    from brainscore_language import score
-
-    model_score = score(model_identifier='distilgpt2', benchmark_identifier='Futrell2018-pearsonr')
-
-
-Submit to Brain-Score
-=====================
-
-To share your model plugin with the community and to make it accessible for continued benchmark evaluation,
-please submit it to the platform.
-
-There are two main ways to do that:
-
-1. By uploading a zip file on the website
-2. By submitting a github pull request with the proposed changes
-
-Both options result in the same outcome: your plugin will automatically be tested,
-and added to the codebase after it passes tests.

From 6dfcc6c9f19b466ceb9b940cfc96436ab8ed37ab Mon Sep 17 00:00:00 2001
From: Deirdre Kelliher <dmkelliher@gmail.com>
Date: Mon, 22 Jan 2024 18:07:18 -0500
Subject: [PATCH 04/12] moving rstcloth dependency to req.txt

---
 docs/requirements.txt | 3 ++-
 docs/source/conf.py   | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index ab14128c..d384c309 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,4 +3,5 @@
 Sphinx>=4
 sphinx_rtd_theme
 recommonmark
-# sphinxcontrib-bibtex
\ No newline at end of file
+sphinxcontrib-bibtex
+rstcloth
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ddc40a5b..85143560 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -36,7 +36,6 @@
     'sphinx.ext.viewcode',
     'sphinx_rtd_theme',
     'sphinxcontrib.bibtex',
-    'rstcloth',
     'sphinx.ext.autosectionlabel'
 ]
 autosummary_generate = True  # Turn on sphinx.ext.autosummary

From 6c8eaea4600148ad70e235344b4751df1868b55e Mon Sep 17 00:00:00 2001
From: Deirdre Kelliher <dmkelliher@gmail.com>
Date: Mon, 5 Feb 2024 16:35:19 -0500
Subject: [PATCH 05/12] Adding sphinx _autosummary docs to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5f7bafda..e2634422 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@ instance/
 
 # Sphinx documentation
 docs/_build/
+docs/source/modules/_autosummary
 
 # PyBuilder
 target/

From 1289731cf773acd278634b9418a2b95feffb1f8b Mon Sep 17 00:00:00 2001
From: Deirdre Kelliher <dmkelliher@gmail.com>
Date: Mon, 5 Feb 2024 16:36:07 -0500
Subject: [PATCH 06/12] Removing unused CSS overrides for Sphinx

---
 docs/source/_static/overrides.css | 5 -----
 docs/source/conf.py               | 6 ------
 2 files changed, 11 deletions(-)
 delete mode 100644 docs/source/_static/overrides.css

diff --git a/docs/source/_static/overrides.css b/docs/source/_static/overrides.css
deleted file mode 100644
index b67302d2..00000000
--- a/docs/source/_static/overrides.css
+++ /dev/null
@@ -1,5 +0,0 @@
-@import url("sphinx_rtd_theme.css");
-
-h6 {
-    font-size: 5px;
-}
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 85143560..9ced09c1 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -65,12 +65,6 @@
 # a list of builtin themes.
 #
 html_theme = "sphinx_rtd_theme"
-html_style = "overrides.css"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
 
 
 def skip(app, what, name, obj, would_skip, options):

From b26ae67ada0d23ee189ca49204178dfd6f922dc3 Mon Sep 17 00:00:00 2001
From: Deirdre Kelliher <dmkelliher@gmail.com>
Date: Mon, 5 Feb 2024 16:38:28 -0500
Subject: [PATCH 07/12] refactored document_plugins to use brainscore_core
 (won't work until core updates are PRed)

---
 brainscore_language/document_plugins.py       | 196 ------------------
 brainscore_language/utils/document_plugins.py |  23 ++
 2 files changed, 23 insertions(+), 196 deletions(-)
 delete mode 100644 brainscore_language/document_plugins.py
 create mode 100644 brainscore_language/utils/document_plugins.py

diff --git a/brainscore_language/document_plugins.py b/brainscore_language/document_plugins.py
deleted file mode 100644
index 3cc13f1b..00000000
--- a/brainscore_language/document_plugins.py
+++ /dev/null
@@ -1,196 +0,0 @@
-""" Make plugin details available to readthedocs """
-
-import json
-import logging
-from pathlib import Path
-import re
-from rstcloth import RstCloth
-from typing import Dict, List, Union
-
-from brainscore_language import load_dataset, load_metric
-
-# BIBS_DIR = 'docs/source/bibtex/'
-BIBS_DIR = '../docs2/source/bibtex/'
-GITHUB_DIR = 'https://github.com/brain-score/language/tree/main/brainscore_language/'
-# PLUGINS_DOC_FP = 'docs/source/modules/plugins.rst'
-PLUGINS_DOC_FP = '../docs2/source/modules/plugins.rst'
-PLUGINS_LIST_FP = 'brainscore_language/plugin_management/all_plugins.json'
-PLUGIN_DIRS = ['benchmarks', 'data', 'metrics', 'models']
-PLUGIN_TYPE_MAP = {plugin_dirtype:plugin_dirtype.strip('s') for plugin_dirtype in PLUGIN_DIRS}
-
-
-def _get_module_plugin_names(plugin_type:str, plugin_dir:Path) -> List[str]:
-    """ Returns list of plugins registered by module """
-    init_fp = plugin_dir / "__init__.py"
-    registry = PLUGIN_TYPE_MAP[plugin_type] + "_registry"
-
-    with open(init_fp, 'r') as f:
-        text = f.read()
-        registered_plugins = re.findall(registry+'\[(.*)\]', text)
-        cleaned_plugin_names = [name.replace('"', '').replace('\'', '') for name in registered_plugins]
-    
-    return cleaned_plugin_names
-
-def _id_from_bibtex(bibtex:str) -> str:
-    """ Returns BibTeX identifier from BibTeX """
-    return re.search('\{(.*?),', bibtex).group(1)
-
-def _data_metric_from_benchmark(filepath):
-    data_pattern = 'self.data = load_data' 
-    metric_pattern = 'self.metric = load_metric'
-    with open(filepath, 'r') as f:
-        content = f.read()
-
-    data_match = re.search(r'load_dataset\([\'"]([^\'"]*)[\'"]\)', content)
-    metric_match = re.search(r'load_metric\([\'"]([^\'"]*)[\'"]\)', content)
-
-    data_value = data_match.group(1) if data_match else None
-    metric_value = metric_match.group(1) if metric_match else None
-
-    return data_value, metric_value
-
-def get_all_plugin_info() -> Dict[str, Dict[str, Dict[str, Union[list, str, str]]]]:
-    """Add all plugins to respective type registries
-
-    Returns a dict where key is plugin type,
-    value is a dict where key is name of plugin dir,
-    value is a dict of plugin info:
-
-    plugin_names: list of names of all plugins registered by module
-    bibtex: a BibTeX string
-    bibtex_id: BibTeX identifier
-    """
-    all_plugin_info = {}
-    for plugin_type in PLUGIN_DIRS:
-        plugins_dir = Path(Path(__file__).parent, plugin_type)
-        for plugin_dir in plugins_dir.glob('[!._]*'):
-            plugin_dirname = str(plugin_dir)
-  
-            if plugin_type not in all_plugin_info:
-                all_plugin_info[plugin_type] = {plugin_dirname:{}}
-            else:
-                all_plugin_info[plugin_type].update({plugin_dirname:{}})
-
-            plugin_dir_dict = all_plugin_info[plugin_type][plugin_dirname]
-
-            plugin_names = _get_module_plugin_names(plugin_type, plugin_dir)
-            plugin_module_path = plugin_dirname.replace('/', '.')
-            plugin_module = __import__(plugin_module_path, fromlist=['BIBTEX'])
-
-            plugin_dir_dict['plugin_names'] = plugin_names
-            if hasattr(plugin_module, 'BIBTEX'):
-                plugin_dir_dict['bibtex'] = plugin_module.BIBTEX
-                plugin_dir_dict['bibtex_id'] = _id_from_bibtex(plugin_module.BIBTEX) 
-
-            if plugin_type == 'benchmarks':
-                data_value, metric_value = _data_metric_from_benchmark(plugin_dirname + '/benchmark.py')
-                if data_value:
-                    plugin_dir_dict['data_value'] = data_value
-                if metric_value:
-                    plugin_dir_dict['metric_value'] = metric_value
-
-    return all_plugin_info
-
-def _remove_duplicate_bibs(plugins_with_bibtex=Dict[str, Dict]):
-    """ Returns list of unique BibTeX to add """
-    bibtex_data = {v['bibtex_id']:v['bibtex'] for v in plugins_with_bibtex.values()}
-    alphabetized_bibtex = dict(sorted(bibtex_data.items()))
-    deduped_bibtex = list(alphabetized_bibtex.values())
-
-    return deduped_bibtex
-
-def _record_bibtex(bibtex_to_add:List[str], plugins_bib_fp:str):
-    """ insert new BibTeX into respective .bib files """
-    if not Path(BIBS_DIR).exists():
-        Path(BIBS_DIR).mkdir(parents=True)
-    with open(plugins_bib_fp, "w+") as f:
-        for bibtex in bibtex_to_add:
-            f.write(bibtex)
-            f.write('\n')
-
-def create_bibfile(plugins=Dict[str, Dict], plugin_type='refs'):
-    """ For all plugins, add bibtex (if present) to .bib files """
-    if plugin_type == 'refs':
-        plugins = dict(ele for sub in plugins.values() for ele in sub.items())
-    # drop plugins without bibtex
-    plugins_with_bibtex = {k:v for k,v in plugins.items() if 'bibtex' in v.keys()}
-    if len(plugins_with_bibtex.keys()) > 0:
-        plugins_bib_fp = Path(BIBS_DIR + plugin_type + '.bib')
-        # add bibtex (if present) to .bib files
-        bibtex_to_add = _remove_duplicate_bibs(plugins_with_bibtex)
-        _record_bibtex(bibtex_to_add, plugins_bib_fp)
-
-def _prepare_content(all_plugin_info:Dict[str, Dict]) -> Dict[str, Dict]:
-    """Converts plugin information into rst format
-
-    Returns a dict where key is plugin type, value is a dict
-    of plugin names (str) mapped to a dict of their info
-
-    NOTE: info is currently plugin directory paths and BiBTeX citations, 
-    but could expand to e.g. include description of plugin
-    """
-    prepared_plugin_info = {}
-    for plugin_type in all_plugin_info:
-        plugin_type_title = plugin_type.capitalize()
-        prepared_plugin_info[plugin_type_title] = {name:{'dirname':k, 
-                                                        'citation':(':cite:label:`' + v['bibtex_id'] +'`'
-                                                        if 'bibtex_id' in v.keys() else None),
-                                                        'data':(f":ref:`{v['data_value']} <{v['data_value']}>`"
-                                                        if 'data_value' in v.keys() else None),
-                                                        'metric':(f":ref:`{v['metric_value']} <{v['metric_value']}>`"
-                                                        if 'metric_value' in v.keys() else None)}
-                                                        for k,v in all_plugin_info[plugin_type].items() 
-                                                        for name in v['plugin_names']}
-    return prepared_plugin_info
-
-def _write_to_rst(plugin_info:Dict[str,Dict]):
-    """ Writes plugin info to readthedocs plugins.rst """
-    upper_levels = set()
-    with open(PLUGINS_DOC_FP, 'w+') as f:
-        doc = RstCloth(f)
-        doc.ref_target(name="plugins")
-        doc.newline()
-        doc.title('Plugins')
-        doc.newline()
-        for plugin_type in plugin_info:
-            doc.newline()
-            doc.h3(plugin_type)
-            doc.content(f'The following {plugin_type} are available. Plugin identifiers are grouped by directory, shown in bold.')
-            doc.newline()
-            for plugin in plugin_info[plugin_type]:
-                location = plugin_info[plugin_type][plugin]['dirname']
-                location_link = f'`{location} <{GITHUB_DIR}{location}>`_'
-                if location not in upper_levels:
-                    idx = location.index('/')
-                    doc.h4(location[idx+1:].capitalize())
-                    doc.content(f'Location: {location_link}')
-                    doc.newline()
-                    upper_levels.add(location)
-                doc.newline()
-                doc.ref_target(plugin)
-                doc.h6(plugin)
-                doc.newline()
-                if plugin_info[plugin_type][plugin]['data']:
-                    doc.content(f"Data: {plugin_info[plugin_type][plugin]['data']}", indent=2)
-                    doc.newline()
-                if plugin_info[plugin_type][plugin]['metric']:
-                    doc.content(f"Metric: {plugin_info[plugin_type][plugin]['metric']}", indent=2)
-                    doc.newline()
-                if plugin_info[plugin_type][plugin]['citation']:
-                    doc.content(f"Citation: {plugin_info[plugin_type][plugin]['citation']}", indent=2)
-                    doc.newline()
-                doc.newline()
-        doc.h2('Bibliography')
-        doc.directive(name="bibliography", fields=[('all','')])
-
-def update_readthedocs(all_plugin_info:Dict[str,Dict]):
-    """ For all plugins, add name and info to readthedocs (plugins.rst) """
-    prepared_plugin_info = _prepare_content(all_plugin_info) # rst formatting
-    _write_to_rst(prepared_plugin_info)
-
-if __name__ == '__main__':
-    all_plugin_info = get_all_plugin_info()
-    for plugin_type in all_plugin_info:
-        create_bibfile(all_plugin_info[plugin_type], plugin_type) # plugin type .bib file
-    create_bibfile(all_plugin_info) # one .bib file to rule them all
-    update_readthedocs(all_plugin_info)
diff --git a/brainscore_language/utils/document_plugins.py b/brainscore_language/utils/document_plugins.py
new file mode 100644
index 00000000..bb49e8b6
--- /dev/null
+++ b/brainscore_language/utils/document_plugins.py
@@ -0,0 +1,23 @@
+""" Make plugin details available to readthedocs """
+
+from pathlib import Path
+from brainscore_core.plugin_management.plugin_utils import get_all_plugin_info
+from brainscore_core.plugin_management.document_plugins import create_bibfile, update_readthedocs
+
+
+BIBS_DIR = Path(Path(__file__).parents[2], 'docs', 'source', 'bibtex')
+PLUGINS_DOC = Path(Path(__file__).parents[2], 'docs', 'source', 'modules')
+GITHUB_DIR = 'https://github.com/brain-score/language/tree/main/brainscore_language/'
+
+
+def update_docs():
+    all_plugin_info = get_all_plugin_info(Path(__file__).parents[1])
+    print(f"all_plugin_info: {all_plugin_info}")
+    for plugin_type in all_plugin_info:
+        create_bibfile(all_plugin_info[plugin_type], BIBS_DIR, plugin_type) # plugin type .bib file
+    create_bibfile(all_plugin_info, BIBS_DIR) # one .bib file to rule them all
+    update_readthedocs(all_plugin_info, PLUGINS_DOC, GITHUB_DIR)
+            
+
+if __name__ == '__main__':
+    update_docs()

From 2112ab6603b64bdbe88470614e91f8954add3104 Mon Sep 17 00:00:00 2001
From: Deirdre Kelliher <dmkelliher@gmail.com>
Date: Mon, 5 Feb 2024 16:39:51 -0500
Subject: [PATCH 08/12] BROKEN adding start to prebuild step for readthedocs

---
 .readthedocs.yml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 57a75f48..57981d8f 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,9 +1,13 @@
 version: 2
 
 build:
-  os: "ubuntu-22.04"
-  tools:
-   python: "3.8"
+   os: "ubuntu-22.04"
+   tools:
+      python: "3.8"
+   jobs:
+      pre_build:
+      - ???
+
 
 python:
    install:

From 4b17e6a70cedc0131c7dbac870572b4dd82c4dca Mon Sep 17 00:00:00 2001
From: Deirdre Kelliher <dmkelliher@gmail.com>
Date: Wed, 7 Feb 2024 14:39:37 -0500
Subject: [PATCH 09/12] Changing name of utils.xarray to prevent circular
 deendency

---
 brainscore_language/utils/{xarray.py => xarray_utils.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename brainscore_language/utils/{xarray.py => xarray_utils.py} (100%)

diff --git a/brainscore_language/utils/xarray.py b/brainscore_language/utils/xarray_utils.py
similarity index 100%
rename from brainscore_language/utils/xarray.py
rename to brainscore_language/utils/xarray_utils.py

From e5410ffdd4a997a3e7122eeca5937ee0a330a8ae Mon Sep 17 00:00:00 2001
From: Carol Jiang <cjiang24@evnode.ib.cluster>
Date: Thu, 8 Feb 2024 15:25:15 -0500
Subject: [PATCH 10/12] draft changes to .readthedocs.yml

---
 .readthedocs.yml                              | 3 ++-
 brainscore_language/utils/document_plugins.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 57981d8f..010b303b 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -6,7 +6,8 @@ build:
       python: "3.8"
    jobs:
       pre_build:
-      - ???
+      - cd brainscore_language/utils
+      - python document_plugins.py
 
 
 python:
diff --git a/brainscore_language/utils/document_plugins.py b/brainscore_language/utils/document_plugins.py
index bb49e8b6..1cf0bbef 100644
--- a/brainscore_language/utils/document_plugins.py
+++ b/brainscore_language/utils/document_plugins.py
@@ -12,6 +12,7 @@
 
 def update_docs():
     all_plugin_info = get_all_plugin_info(Path(__file__).parents[1])
+    print('running document_plugins.py')
     print(f"all_plugin_info: {all_plugin_info}")
     for plugin_type in all_plugin_info:
         create_bibfile(all_plugin_info[plugin_type], BIBS_DIR, plugin_type) # plugin type .bib file

From 77eafa789abe062cb3ede00c91a4972820343596 Mon Sep 17 00:00:00 2001
From: Carol Jiang <cjiang24@evnode.ib.cluster>
Date: Fri, 9 Feb 2024 10:48:33 -0500
Subject: [PATCH 11/12] test dir change

---
 .readthedocs.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 010b303b..4d95fdbc 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -6,7 +6,10 @@ build:
       python: "3.8"
    jobs:
       pre_build:
+      - pwd
+      - cat brainscore_language/utils/document_plugins.py
       - cd brainscore_language/utils
+      - pwd
       - python document_plugins.py
 
 

From 6b9876eaf1bfe958312a6ba87b2aa3029bde66bf Mon Sep 17 00:00:00 2001
From: Carol Jiang <cjiang24@evnode.ib.cluster>
Date: Fri, 9 Feb 2024 11:47:03 -0500
Subject: [PATCH 12/12] delete cd

---
 .readthedocs.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 4d95fdbc..529b5306 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -6,11 +6,8 @@ build:
       python: "3.8"
    jobs:
       pre_build:
-      - pwd
       - cat brainscore_language/utils/document_plugins.py
-      - cd brainscore_language/utils
-      - pwd
-      - python document_plugins.py
+      - python brainscore_language/utils/document_plugins.py
 
 
 python: