Hugo0 · Hugo0 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -34,8 +34,10 @@ jobs:
           uv sync
           pnpm install
 
-      - name: Check Python formatting (black)
-        run: uv run black --check webapp/ tests/
+      - name: Check Python formatting & linting (ruff)
+        run: |
+          uv run ruff check webapp/ tests/
+          uv run ruff format --check webapp/ tests/
 
       - name: Check TypeScript formatting (prettier)
         run: pnpm format:check

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,15 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.15.6
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v3.5.3
+    hooks:
+      - id: prettier
+        types_or: [javascript, ts, json, vue]
+        additional_dependencies:
+          - prettier@3.8.1
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -77,10 +77,11 @@ pnpm dev           # Starts both Flask server + Vite watcher
 
 ### Before Committing
 ```bash
-pnpm format            # Format TypeScript
-uv run black webapp/ tests/  # Format Python
-pnpm test              # Run TS tests
-uv run pytest tests/   # Run Python tests
+pnpm format                          # Format TypeScript
+uv run ruff format webapp/ tests/    # Format Python
+uv run ruff check webapp/ tests/     # Lint Python
+pnpm test                            # Run TS tests
+uv run pytest tests/                 # Run Python tests
 ```
 
 ## Important Notes
@@ -107,8 +108,9 @@ These are tracked by pytest but not blocking - they're data issues, not code iss
 
 ### Python
 
-- Black formatter, 100 char line length
-- Run `uv run black webapp/ tests/` before committing
+- Ruff formatter + linter, 100 char line length
+- Run `uv run ruff format webapp/ tests/` and `uv run ruff check webapp/ tests/` before committing
+- Pre-commit hooks run both automatically
 
 ## Don't
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,28 +7,43 @@ dependencies = [
     "flask>=3.1.0",
     "flask-cors>=6.0.0",
     "gunicorn>=24.0.0",
-    "Flask-FlatPages>=0.9.0",
     "openai>=2.21.0",
     "pillow>=12.1.1",
 ]
 
 [dependency-groups]
 dev = [
     "pytest>=8.0.0",
-    "black>=24.0.0",
+    "ruff>=0.11.0",
     "wordfreq>=3.1.1",
 ]
 
-[tool.black]
+[tool.ruff]
 line-length = 100
-target-version = ['py314']
-include = '\.pyi?$'
-extend-exclude = '''
-/(
-    \.git
-    | \.venv
-    | venv
-    | node_modules
-    | webapp/static/dist
-)/
-'''
+target-version = "py311"
+exclude = [
+    ".git",
+    ".venv",
+    "venv",
+    "node_modules",
+    "webapp/static/dist",
+    "webapp/deprecated",
+    "tests/deprecated",
+]
+
+[tool.ruff.lint]
+select = [
+    "E",   # pycodestyle errors
+    "W",   # pycodestyle warnings
+    "F",   # pyflakes
+    "I",   # isort
+    "UP",  # pyupgrade
+    "B",   # flake8-bugbear
+    "SIM", # flake8-simplify
+]
+ignore = [
+    "E501", # line too long (formatter handles this)
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["webapp"]
diff --git a/scripts/analyze_word_quality.py b/scripts/analyze_word_quality.py
@@ -22,7 +22,6 @@
 import argparse
 import sys
 from collections import defaultdict
-from math import log
 from pathlib import Path
 
 SCRIPT_DIR = Path(__file__).parent
@@ -133,7 +132,7 @@ def cmd_char_freq(args):
     for char, freq in sorted_chars:
         count = int(freq * len(words))
         bar = "#" * int(freq * 100)
-        print(f"  {char}    {count:>6} {freq*100:>7.1f}%  {bar}")
+        print(f"  {char}    {count:>6} {freq * 100:>7.1f}%  {bar}")
 
     # Threshold analysis
     print(f"\n{'Threshold analysis':}")
@@ -144,7 +143,7 @@ def cmd_char_freq(args):
         filtered = [w for w in words if any(c in rare_chars for c in w)]
         remaining = len(words) - len(filtered)
         print(
-            f"    {threshold*100:>4.0f}%    {len(rare_chars):>6}       {len(filtered):>8}          {remaining:>6}"
+            f"    {threshold * 100:>4.0f}%    {len(rare_chars):>6}       {len(filtered):>8}          {remaining:>6}"
         )
 
 
@@ -181,7 +180,7 @@ def cmd_difficult_words(args):
     # Filter by threshold if specified
     if threshold is not None:
         scored = [(w, f, c) for w, f, c in scored if f < threshold]
-        print(f"Words in {lang} daily list with rarest character below {threshold*100:.0f}%:")
+        print(f"Words in {lang} daily list with rarest character below {threshold * 100:.0f}%:")
     else:
         print(f"All words in {lang} daily list sorted by difficulty (hardest first):")
 
@@ -191,7 +190,7 @@ def cmd_difficult_words(args):
     print(f"{'Word':<12} {'Rarest Char':>12} {'Char Freq %':>12}")
     print("-" * 38)
     for word, freq, char in scored:
-        print(f"  {word:<10} {char:>8}      {freq*100:>7.1f}%")
+        print(f"  {word:<10} {char:>8}      {freq * 100:>7.1f}%")
 
     print(f"\nTotal: {len(scored)} words")
 
@@ -267,10 +266,10 @@ def cmd_hebrew_suffixes(args):
         print(f"  → Keep: {keep}, blocklist: {to_block}")
         print()
 
-    print(f"{'='*50}")
+    print(f"{'=' * 50}")
     print(f"Total groups: {len(groups)}")
     print(f"Total words to blocklist: {total_to_blocklist}")
-    print(f"\nBlocklist additions (copy-paste ready):")
+    print("\nBlocklist additions (copy-paste ready):")
     for w in sorted(blocklist_words):
         print(w)
 
@@ -296,7 +295,7 @@ def cmd_hebrew_quality(args):
         sys.exit(1)
 
     print(f"Hebrew daily word quality analysis ({len(words)} words)")
-    print(f"Cross-referencing with wordfreq (Wikipedia, Reddit, Google Books, etc.)\n")
+    print("Cross-referencing with wordfreq (Wikipedia, Reddit, Google Books, etc.)\n")
 
     # Score each word
     not_in_wordfreq = []
@@ -313,16 +312,16 @@ def cmd_hebrew_quality(args):
             normal.append((word, zf))
 
     # Report
-    print(f"Category breakdown:")
+    print("Category breakdown:")
     print(f"  Normal (zipf >= 2.0):      {len(normal):>5} words")
     print(f"  Low frequency (zipf < 2.0): {len(low_wordfreq):>5} words")
     print(f"  Not in wordfreq at all:     {len(not_in_wordfreq):>5} words")
 
     if not_in_wordfreq:
-        print(f"\n{'='*50}")
+        print(f"\n{'=' * 50}")
         print(f"Words NOT found in wordfreq ({len(not_in_wordfreq)} words)")
-        print(f"These may be proper nouns, obscure, or malformed:")
-        print(f"{'='*50}")
+        print("These may be proper nouns, obscure, or malformed:")
+        print(f"{'=' * 50}")
         # Show first N
         limit = args.limit or 100
         for word, zf in sorted(not_in_wordfreq)[:limit]:
@@ -331,10 +330,10 @@ def cmd_hebrew_quality(args):
             print(f"  ... and {len(not_in_wordfreq) - limit} more")
 
     if low_wordfreq:
-        print(f"\n{'='*50}")
+        print(f"\n{'=' * 50}")
         print(f"Low-frequency words (zipf < 2.0, {len(low_wordfreq)} words)")
-        print(f"These may be uncommon or domain-specific:")
-        print(f"{'='*50}")
+        print("These may be uncommon or domain-specific:")
+        print(f"{'=' * 50}")
         low_wordfreq.sort(key=lambda x: x[1])
         limit = args.limit or 50
         for word, zf in low_wordfreq[:limit]:
@@ -348,7 +347,7 @@ def cmd_hebrew_quality(args):
     if has_freq:
         avg = sum(has_freq) / len(has_freq)
         print(
-            f"\nWordfreq coverage: {len(has_freq)}/{len(words)} words ({100*len(has_freq)/len(words):.1f}%)"
+            f"\nWordfreq coverage: {len(has_freq)}/{len(words)} words ({100 * len(has_freq) / len(words):.1f}%)"
         )
         print(f"Average zipf frequency (of found words): {avg:.2f}")
 

diff --git a/scripts/configs.ipynb b/scripts/configs.ipynb
@@ -48,7 +48,7 @@
     "data_dir = \"../webapp/data/\"\n",
     "language_codes = [f.split(\"/\")[-1] for f in glob.glob(f\"{data_dir}/languages/*\")]\n",
     "\n",
-    "with open(f\"{data_dir}/languages.json\", \"r\") as f:\n",
+    "with open(f\"{data_dir}/languages.json\") as f:\n",
     "    languages = json.load(f)\n",
     "\n",
     "print(languages[\"en\"])"
@@ -107,7 +107,7 @@
    ],
    "source": [
     "# load english language config\n",
-    "with open(f\"{data_dir}/languages/en/language_config.json\", \"r\") as f:\n",
+    "with open(f\"{data_dir}/languages/en/language_config.json\") as f:\n",
     "    en_config = json.load(f)\n",
     "\n",
     "f\"{data_dir}/languages/en/language_config.json\""
@@ -131,7 +131,6 @@
     "    # load from_language config\n",
     "    with open(\n",
     "        f\"{data_dir}languages/{from_language}/language_config.json\",\n",
-    "        \"r\",\n",
     "        encoding=\"utf-8\",\n",
     "    ) as f:\n",
     "        from_language_config = json.load(f)\n",
@@ -141,7 +140,6 @@
     "    if os.path.exists(f\"{data_dir}languages/{to_language}/language_config.json\"):\n",
     "        with open(\n",
     "            f\"{data_dir}languages/{to_language}/language_config.json\",\n",
-    "            \"r\",\n",
     "            encoding=\"utf-8\",\n",
     "        ) as f:\n",
     "            to_language_config = json.load(f)\n",
@@ -158,9 +156,7 @@
     "        language_config[\"meta\"] = {}\n",
     "        language_config[\"meta\"][\"locale\"] = to_language\n",
     "        language_config[\"text\"] = {}\n",
-    "        language_config[\"text\"][\"subheader\"] = languages[to_language][\n",
-    "            \"language_name_native\"\n",
-    "        ]\n",
+    "        language_config[\"text\"][\"subheader\"] = languages[to_language][\"language_name_native\"]\n",
     "\n",
     "        # defaults\n",
     "        language_config[\"language_code_3\"] = \"\"\n",
@@ -202,9 +198,7 @@
     "\n",
     "        language_config[\"help\"] = {}\n",
     "        for key in from_language_config[\"help\"]:\n",
-    "            translated_text = translate_text(\n",
-    "                from_language_config[\"help\"][key], to_language\n",
-    "            )\n",
+    "            translated_text = translate_text(from_language_config[\"help\"][key], to_language)\n",
     "            if key in [\n",
     "                \"text_2_1\",\n",
     "                \"text_2_2\",\n",
@@ -553,14 +547,13 @@
     "\n",
     "import glob\n",
     "import json\n",
-    "import os\n",
     "\n",
     "data_dir = \"../webapp/data/\"\n",
     "\n",
     "for lang in glob.glob(f\"{data_dir}languages/*\"):\n",
     "    lang = lang.split(\"/\")[-1]\n",
     "    try:\n",
-    "        with open(f\"{data_dir}/languages/{lang}/language_config.json\", \"r\") as f:\n",
+    "        with open(f\"{data_dir}/languages/{lang}/language_config.json\") as f:\n",
     "            language_config = json.load(f)\n",
     "\n",
     "        with open(f\"{data_dir}/languages/{lang}/keyboard.json\", \"w\") as f:\n",
@@ -601,7 +594,7 @@
     "for lang in glob.glob(f\"{data_dir}languages/*\"):\n",
     "    lang = lang.split(\"/\")[-1]\n",
     "    try:\n",
-    "        with open(f\"{data_dir}/languages/{lang}/{lang}_keyboard.json\", \"r\") as f:\n",
+    "        with open(f\"{data_dir}/languages/{lang}/{lang}_keyboard.json\") as f:\n",
     "            keyboard = json.load(f)\n",
     "\n",
     "        keyboard = []\n",

diff --git a/scripts/curate_words.py b/scripts/curate_words.py
@@ -20,8 +20,6 @@
 
 import argparse
 import datetime
-import os
-import shutil
 from pathlib import Path
 
 # Paths
@@ -79,7 +77,7 @@ def extract_next_words(lang: str, num_days: int = 365) -> None:
             f.write(f"{start_idx + i}: {word}\n")
 
     print(f"Wrote {num_days} words to {output_file}")
-    print(f"Review the file and identify words to remove.")
+    print("Review the file and identify words to remove.")
 
 
 def remove_words(lang: str, words_to_remove: list[str]) -> None:

diff --git a/scripts/deprecated/capture_wiktionary_fixtures.py b/scripts/deprecated/capture_wiktionary_fixtures.py
@@ -10,7 +10,6 @@
 """
 
 import json
-import os
 import sys
 import time
 import urllib.parse
@@ -21,7 +20,7 @@
 PROJECT_ROOT = Path(__file__).parent.parent
 sys.path.insert(0, str(PROJECT_ROOT / "webapp"))
 
-from wiktionary import parse_wikt_definition, WIKT_LANG_MAP
+from wiktionary import WIKT_LANG_MAP, parse_wikt_definition
 
 LANGUAGES_DIR = PROJECT_ROOT / "webapp" / "data" / "languages"
 FIXTURES_DIR = PROJECT_ROOT / "tests" / "fixtures" / "wiktionary"
@@ -32,7 +31,7 @@ def load_word_list(lang_code):
     word_file = LANGUAGES_DIR / lang_code / f"{lang_code}_5words.txt"
     if not word_file.exists():
         return []
-    with open(word_file, "r", encoding="utf-8") as f:
+    with open(word_file, encoding="utf-8") as f:
         return [line.strip() for line in f if line.strip()]
 
 

diff --git a/scripts/hunspellToJSON.py b/scripts/hunspellToJSON.py
@@ -1,5 +1,9 @@
 #!/usr/bin/python3.3
-import re, argparse, os, gzip, json
+import argparse
+import gzip
+import json
+import os
+import re
 
 
 def file_to_list(in_file):
@@ -393,15 +397,15 @@ def main():
 
         # Open AFF file
         try:
-            aff_file = open(aff_path, "r", encoding="ISO8859-1")
+            aff_file = open(aff_path, encoding="ISO8859-1")
             aff_rules = AFF(aff_file)
             aff_file.close()
-        except IOError:
+        except OSError:
             print(aff_path + " not found")
 
         # Open DIC file
         try:
-            dict_file = open(dict_path, "r", encoding="ISO8859-1")
+            dict_file = open(dict_path, encoding="ISO8859-1")
             dictionary = DICT(
                 dict_file,
                 aff_rules,
@@ -414,7 +418,6 @@ def main():
 
             # Open output file
             if args.output:
-
                 if args.gzip:
                     out_file = gzip.open(args.output, "wb")
                 else:
@@ -433,7 +436,7 @@ def main():
             out_file.close()
 
             dict_file.close()
-        except IOError:
+        except OSError:
             print(dict_path + " not found")