ApolloResearch · danbraunai-apollo · Apr 22, 2025 · Feb 12, 2025 · Feb 13, 2025 · Feb 13, 2025
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -37,5 +37,29 @@
                 "PYDEVD_DISABLE_FILE_VALIDATION": "1"
             }
         },
+        {
+            "name": "lm",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/spd/experiments/lm/lm_decomposition.py",
+            "args": "${workspaceFolder}/spd/experiments/lm/lm_config.yaml",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "env": {
+                "PYDEVD_DISABLE_FILE_VALIDATION": "1"
+            }
+        },
+        {
+            "name": "lm streamlit",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "streamlit",
+            "args": [
+                "run",
+                "${workspaceFolder}/spd/experiments/lm/app.py",
+                "--server.port",
+                "2000"
+            ]
+        }
     ]
 }
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,10 @@ dependencies = [
     "python-dotenv",
     "wandb<=0.17.7", # due to https://github.com/wandb/wandb/issues/8248
     "sympy",
+    "streamlit",
+    "streamlit-antd-components",
+    "datasets",
+    "simple-stories-train"
 ]
 
 [project.optional-dependencies]

diff --git a/spd/configs.py b/spd/configs.py
@@ -36,6 +36,20 @@ class ResidualMLPTaskConfig(BaseModel):
     pretrained_model_path: ModelPath  # e.g. wandb:spd-resid-mlp/runs/j9kmavzi
 
 
+class LMTaskConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid", frozen=True)
+    task_name: Literal["lm"] = "lm"
+    model_size: str  # e.g. "1.25M"
+    max_seq_len: PositiveInt = 512
+    buffer_size: PositiveInt = 1000
+    dataset_name: str = "lennart-finke/SimpleStories"
+    train_data_split: str = "train"
+    eval_data_split: str = "test"
+    n_eval_steps: PositiveInt = 100
+    # List of fnmatch patterns for nn.Linear modules to decompose
+    target_module_patterns: list[str] = ["transformer.h.*.mlp.*_proj"]
+
+
 class Config(BaseModel):
     model_config = ConfigDict(extra="forbid", frozen=True)
     wandb_project: str | None = None
@@ -68,7 +82,9 @@ class Config(BaseModel):
     unit_norm_matrices: bool = False
     attribution_type: Literal["gradient"] = "gradient"
     n_gate_hidden_neurons: PositiveInt | None = None
-    task_config: TMSTaskConfig | ResidualMLPTaskConfig = Field(..., discriminator="task_name")
+    task_config: TMSTaskConfig | ResidualMLPTaskConfig | LMTaskConfig = Field(
+        ..., discriminator="task_name"
+    )
 
     DEPRECATED_CONFIG_KEYS: ClassVar[list[str]] = []
     RENAMED_CONFIG_KEYS: ClassVar[dict[str, str]] = {}