From b14fd55b2719e2ac2e449a870edf87ba9cbd0c11 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Aug 2025 13:44:19 -0400 Subject: [PATCH 1/4] Add github action to codespell main on push and PRs --- .github/workflows/codespell.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/codespell.yml diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 00000000..b2316674 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,25 @@ +# Codespell configuration is within pyproject.toml +--- +name: Codespell + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Annotate locations with typos + uses: codespell-project/codespell-problem-matcher@v1 + - name: Codespell + uses: codespell-project/actions-codespell@v2 From 8f4049b518cf12d80733bcd5d6463ab137a26ea2 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Aug 2025 13:46:12 -0400 Subject: [PATCH 2/4] [DATALAD RUNCMD] chore: run codespell throughout fixing a few typos interactively === Do not change lines below === { "chain": [], "cmd": "codespell -w -i 3 -C 4 ./brainscore_language/metrics/cka/metric.py", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- brainscore_language/metrics/cka/metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainscore_language/metrics/cka/metric.py b/brainscore_language/metrics/cka/metric.py index 49f97cf0..c5f72152 100644 --- a/brainscore_language/metrics/cka/metric.py +++ b/brainscore_language/metrics/cka/metric.py @@ -19,7 +19,7 @@ def centering(K): return np.dot(np.dot(H, K), H) # HKH are the same with KH, KH is the first centering, H(KH) do the second time, - # results are the sme with one time centering + # results are the same with one time centering # return np.dot(H, K) # KH From 9554b78e02f76de6367b92b47002ee827958e3ed Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Aug 2025 13:44:19 -0400 Subject: [PATCH 3/4] Add rudimentary codespell config --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 217638d0..405f672e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,3 +75,10 @@ markers = [ "brainscore_language.data" = ["**"] "brainscore_language.metrics" = ["**"] "brainscore_language.models" = ["**"] + +[tool.codespell] +# Ref: https://github.com/codespell-project/codespell#using-a-config-file +skip = '.git*,*.csv,*.json,data' +check-hidden = true +ignore-regex = '^\s*"image/\S+": ".*' +# ignore-words-list = '' From 76525aa8b2dae38ac38da748329d3cf67d343274 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Aug 2025 13:47:50 -0400 Subject: [PATCH 4/4] [DATALAD RUNCMD] chore: run codespell throughout fixing a few new typos automagically === Do not change lines below === { "chain": [], "cmd": "codespell -w", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- brainscore_language/model_helpers/container.py | 4 ++-- brainscore_language/model_helpers/modeling_suma.py | 2 +- brainscore_language/models/earley_parser/parser.py | 4 ++-- brainscore_language/models/earley_parser/utils.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/brainscore_language/model_helpers/container.py b/brainscore_language/model_helpers/container.py index 896bec9d..5eba0105 100644 --- a/brainscore_language/model_helpers/container.py +++ b/brainscore_language/model_helpers/container.py @@ -24,7 +24,7 @@ class ContainerSubject(ArtificialSubject): """ - Evaluation interface for arbitary containerized models. + Evaluation interface for arbitrary containerized models. User must install either 'Singularity' or 'Docker' to evaluate container models. To add new model, build a container with an entry point that supports the following interface: @@ -73,7 +73,7 @@ def __init__( """ :param container: Container name, e.g., "USERNAME/CONTAINER:TAG" :param entrypoint: Entrypoint to run inside container, e.g., "python /path/to/entrypoint.py" - :param identifier: Model identifer passed to entrypoint, e.g., "model_name" + :param identifier: Model identifier passed to entrypoint, e.g., "model_name" :param region_layer_mapping: Mapping from brain region to requested measure, e.g., {"language_system": "model_layer_name"} :param task_heads: Mapping from task to callable that takes the output of the container and returns a score, e.g., {ArtificialSubject.Task.next_word: predict_next_word_function} """ diff --git a/brainscore_language/model_helpers/modeling_suma.py b/brainscore_language/model_helpers/modeling_suma.py index b4d702a4..07697e3a 100644 --- a/brainscore_language/model_helpers/modeling_suma.py +++ b/brainscore_language/model_helpers/modeling_suma.py @@ -1124,7 +1124,7 @@ def prepare_inputs_for_generation( # Keep only the unprocessed tokens: # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where - # some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as + # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as # input) if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]: input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :] diff --git a/brainscore_language/models/earley_parser/parser.py b/brainscore_language/models/earley_parser/parser.py index 8c306500..35710960 100644 --- a/brainscore_language/models/earley_parser/parser.py +++ b/brainscore_language/models/earley_parser/parser.py @@ -221,7 +221,7 @@ def create_grammar( :param treebank_path: a path to a treebank corpus :param grammar_string: one or more file names to be parsed in the grammar. If None, all files will be parsed :param unk_low_frequency: if True, replaces all words that appear less than k times by - :param k: the replacement threshold (min number of occurances for a word to NOT be replaced by ) + :param k: the replacement threshold (min number of occurrences for a word to NOT be replaced by ) """ # Load PTB annotations @@ -230,7 +230,7 @@ def create_grammar( r".*", ) - # First, get all productions and count the occurances of each lexical in all productions + # First, get all productions and count the occurrences of each lexical in all productions productions = [] lexical_counts = {} for tree in treebank.parsed_sents(fileids): diff --git a/brainscore_language/models/earley_parser/utils.py b/brainscore_language/models/earley_parser/utils.py index 178f5289..100973e2 100644 --- a/brainscore_language/models/earley_parser/utils.py +++ b/brainscore_language/models/earley_parser/utils.py @@ -1,6 +1,6 @@ """ Modified rule definitions for the NLTK abstract chart rules to work with a probabilistic context-free grammar. -Added a probabilstic Earley chart parser by applying incremental chart parsing with the probabilistic rules. +Added a probabilistic Earley chart parser by applying incremental chart parsing with the probabilistic rules. Adapted from: https://www.nltk.org/api/nltk.parse.chart.html """