From b14fd55b2719e2ac2e449a870edf87ba9cbd0c11 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 21 Aug 2025 13:44:19 -0400
Subject: [PATCH 1/4] Add github action to codespell main on push and PRs

---
 .github/workflows/codespell.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/codespell.yml

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 00000000..b2316674
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2

From 8f4049b518cf12d80733bcd5d6463ab137a26ea2 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 21 Aug 2025 13:46:12 -0400
Subject: [PATCH 2/4] [DATALAD RUNCMD] chore: run codespell throughout fixing a
 few typos interactively

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "codespell -w -i 3 -C 4 ./brainscore_language/metrics/cka/metric.py",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 brainscore_language/metrics/cka/metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/brainscore_language/metrics/cka/metric.py b/brainscore_language/metrics/cka/metric.py
index 49f97cf0..c5f72152 100644
--- a/brainscore_language/metrics/cka/metric.py
+++ b/brainscore_language/metrics/cka/metric.py
@@ -19,7 +19,7 @@ def centering(K):
 
     return np.dot(np.dot(H, K), H)
     # HKH are the same with KH, KH is the first centering, H(KH) do the second time,
-    # results are the sme with one time centering
+    # results are the same with one time centering
     # return np.dot(H, K)  # KH
 
 

From 9554b78e02f76de6367b92b47002ee827958e3ed Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 21 Aug 2025 13:44:19 -0400
Subject: [PATCH 3/4] Add rudimentary codespell config

---
 pyproject.toml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 217638d0..405f672e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,3 +75,10 @@ markers = [
 "brainscore_language.data" = ["**"]
 "brainscore_language.metrics" = ["**"]
 "brainscore_language.models" = ["**"]
+
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git*,*.csv,*.json,data'
+check-hidden = true
+ignore-regex = '^\s*"image/\S+": ".*'
+# ignore-words-list = ''

From 76525aa8b2dae38ac38da748329d3cf67d343274 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 21 Aug 2025 13:47:50 -0400
Subject: [PATCH 4/4] [DATALAD RUNCMD] chore: run codespell throughout fixing a
 few  new typos automagically

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "codespell -w",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 brainscore_language/model_helpers/container.py     | 4 ++--
 brainscore_language/model_helpers/modeling_suma.py | 2 +-
 brainscore_language/models/earley_parser/parser.py | 4 ++--
 brainscore_language/models/earley_parser/utils.py  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/brainscore_language/model_helpers/container.py b/brainscore_language/model_helpers/container.py
index 896bec9d..5eba0105 100644
--- a/brainscore_language/model_helpers/container.py
+++ b/brainscore_language/model_helpers/container.py
@@ -24,7 +24,7 @@
 
 class ContainerSubject(ArtificialSubject):
     """
-    Evaluation interface for arbitary containerized models.
+    Evaluation interface for arbitrary containerized models.
     User must install either 'Singularity' or 'Docker' to evaluate container models.
 
     To add new model, build a container with an entry point that supports the following interface:
@@ -73,7 +73,7 @@ def __init__(
         """
         :param container: Container name, e.g., "USERNAME/CONTAINER:TAG"
         :param entrypoint: Entrypoint to run inside container, e.g., "python /path/to/entrypoint.py"
-        :param identifier: Model identifer passed to entrypoint, e.g., "model_name"
+        :param identifier: Model identifier passed to entrypoint, e.g., "model_name"
         :param region_layer_mapping: Mapping from brain region to requested measure, e.g., {"language_system": "model_layer_name"}
         :param task_heads: Mapping from task to callable that takes the output of the container and returns a score, e.g., {ArtificialSubject.Task.next_word: predict_next_word_function}
         """
diff --git a/brainscore_language/model_helpers/modeling_suma.py b/brainscore_language/model_helpers/modeling_suma.py
index b4d702a4..07697e3a 100644
--- a/brainscore_language/model_helpers/modeling_suma.py
+++ b/brainscore_language/model_helpers/modeling_suma.py
@@ -1124,7 +1124,7 @@ def prepare_inputs_for_generation(
 
             # Keep only the unprocessed tokens:
             # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
-            # some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as
+            # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
             # input)
             if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
                 input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
diff --git a/brainscore_language/models/earley_parser/parser.py b/brainscore_language/models/earley_parser/parser.py
index 8c306500..35710960 100644
--- a/brainscore_language/models/earley_parser/parser.py
+++ b/brainscore_language/models/earley_parser/parser.py
@@ -221,7 +221,7 @@ def create_grammar(
         :param treebank_path: a path to a treebank corpus
         :param grammar_string: one or more file names to be parsed in the grammar. If None, all files will be parsed
         :param unk_low_frequency: if True, replaces all words that appear less than k times by <unk>
-        :param k: the <unk> replacement threshold (min number of occurances for a word to NOT be replaced by <unk>)
+        :param k: the <unk> replacement threshold (min number of occurrences for a word to NOT be replaced by <unk>)
         """
 
         # Load PTB annotations
@@ -230,7 +230,7 @@ def create_grammar(
             r".*",
         )
 
-        # First, get all productions and count the occurances of each lexical in all productions
+        # First, get all productions and count the occurrences of each lexical in all productions
         productions = []
         lexical_counts = {}
         for tree in treebank.parsed_sents(fileids):
diff --git a/brainscore_language/models/earley_parser/utils.py b/brainscore_language/models/earley_parser/utils.py
index 178f5289..100973e2 100644
--- a/brainscore_language/models/earley_parser/utils.py
+++ b/brainscore_language/models/earley_parser/utils.py
@@ -1,6 +1,6 @@
 """
 Modified rule definitions for the NLTK abstract chart rules to work with a probabilistic context-free grammar.
-Added a probabilstic Earley chart parser by applying incremental chart parsing with the probabilistic rules.
+Added a probabilistic Earley chart parser by applying incremental chart parsing with the probabilistic rules.
 Adapted from: https://www.nltk.org/api/nltk.parse.chart.html
 """