aiming-lab · alvin-chang · Mar 15, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 23, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,49 @@
+# Python
 *.pyc
 __pycache__/
+*.pyo
+*.egg-info/
+dist/
+build/
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# MetaClaw runtime data
+memory_data/skills/
 records/
+system_prompt_cache.json
+evolution_history.jsonl
+scheduler_state.json
+*.pid
+
+# RL training artifacts
 wandb/
-memory_data/skills
+checkpoints/
+*.ckpt
+
+# MLX model cache (large downloads)
+mlx_models/
+
+# Smoke test temp files
+tests/.smoke_records/
+
+# OS junk
+.DS_Store
+Thumbs.db
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*~
+
+# Secrets
+.env
+config.yaml
+
+# MLX training output
+mlx_metaclaw_output/
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,108 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+MetaClaw is an agent that meta-learns and evolves in the wild. It places your model behind a proxy that intercepts interactions from personal agents (OpenClaw, CoPaw, IronClaw, etc.), injects relevant skills at each turn, and meta-learns from accumulated experience. The system supports three operating modes:
+- `skills_only`: Lightweight proxy with skill injection (no GPU required)
+- `rl`: Skills + RL training with GRPO algorithm
+- `madmax`: Skills + RL + smart scheduler (default mode)
+
+## Key Architecture Components
+
+- **API Server** (`api_server.py`): Main proxy server that intercepts LLM requests and injects skills
+- **CLI** (`cli.py`): Command-line interface for setup, start, stop, and configuration
+- **Configuration** (`config.py`, `config_store.py`): Dataclass-based config system with YAML storage
+- **Skill Management** (`skill_manager.py`, `skill_evolver.py`): Handles skill retrieval and evolution
+- **Claw Adapters** (`claw_adapter.py`): Integration with various personal agents (OpenClaw, CoPaw, etc.)
+- **Training System** (`trainer.py`, `sdk_backend.py`): RL training with support for Tinker, MinT, and MLX backends
+- **Scheduler** (`scheduler.py`): Manages RL training during idle/sleep windows to avoid interrupting active use
+
+## MLX Backend Integration
+
+The repository includes MLX backend support for Apple Silicon Macs, allowing local RL training without cloud GPUs. Key MLX files are located in `metaclaw/mlx_backend/`:
+- `__init__.py` - Package initialization
+- `data_types.py` - MLX-specific data structures
+- `params.py` - MLX training parameters
+- `lora.py` - LoRA implementation for MLX
+- `service_client.py` - MLX implementation of the service client interface
+
+To use MLX backend, configurations in `metaclaw/config.py` may need updates:
+- Add `mlx_model_path` and `mlx_output_dir` settings to MetaClawConfig class
+- Update `training_backend_label()` and `training_backend_banner()` methods to handle "mlx" backend
+- Add "mlx" to the backend selection list in the setup wizard
+
+## Common Commands
+
+```bash
+# One-time setup
+metaclaw setup
+
+# Start MetaClaw (default: madmax mode)
+metaclaw start
+
+# Start in background
+metaclaw start --daemon
+
+# Start with specific mode
+metaclaw start --mode rl          # RL mode only
+metaclaw start --mode skills_only # Skills only mode
+
+# Stop running instance
+metaclaw stop
+
+# Check status
+metaclaw status
+
+# View configuration
+metaclaw config show
+
+# Set configuration values
+metaclaw config KEY VALUE
+
+# Skill management
+metaclaw skills log --n 10        # Show recent skill evolutions
+
+# Scheduler management
+metaclaw scheduler status         # Show scheduler state
+```
+
+## Installation & Dependencies
+
+```bash
+# Basic installation (skills_only mode)
+pip install -e .
+
+# With RL support
+pip install -e ".[rl]"
+
+# With full setup (RL + evolution + scheduler)
+pip install -e ".[rl,evolve,scheduler]"
+
+# With MLX support (Apple Silicon)
+pip install -e ".[mlx]"
+```
+
+## Configuration Structure
+
+- Main config file: `~/.metaclaw/config.yaml`
+- Skills directory: `~/.metaclaw/skills/`
+- Recordings: `records/` directory
+- MLX output: Configurable via `mlx_output_dir` setting
+
+## Key Configuration Options
+
+- `mode`: "madmax" (default), "rl", or "skills_only"
+- `claw_type`: "openclaw", "copaw", "ironclaw", "nanoclaw", "nemoclaw", or "none"
+- `rl.backend`: "auto", "tinker", "mint", or "mlx"
+- `skills.enabled`: Enable/disable skill injection
+- `rl.enabled`: Enable/disable RL training
+- `scheduler.enabled`: Control meta-learning scheduler
+
+## Development Workflow
+
+- Use `metaclaw setup` for initial configuration
+- Develop with `metaclaw start` for immediate testing
+- Monitor logs and state with `metaclaw status`
+- The daemon mode runs in background with logs at `~/.metaclaw/metaclaw.log`
diff --git a/INTEGRATION_NOTES.md b/INTEGRATION_NOTES.md
@@ -0,0 +1,80 @@
+# MLX Backend Integration Notes
+
+## Files to add
+- `metaclaw/mlx_backend/__init__.py`
+- `metaclaw/mlx_backend/data_types.py`
+- `metaclaw/mlx_backend/params.py`
+- `metaclaw/mlx_backend/lora.py`
+- `metaclaw/mlx_backend/service_client.py`
+- `tests/test_mlx_backend.py`
+
+## Files to replace
+- `metaclaw/sdk_backend.py` (full replacement with MLX support)
+
+## Files to patch (small edits)
+
+### metaclaw/setup_wizard.py
+# In metaclaw/setup_wizard.py, update line ~156:
+#
+# BEFORE:
+#     ["auto", "tinker", "mint"],
+#
+# AFTER:
+#     ["auto", "tinker", "mint", "mlx"],
+#
+# This adds "mlx" to the interactive backend selection menu.
+
+
+### metaclaw/config.py
+# In metaclaw/config.py, add these fields to MetaClawConfig:
+#
+#     # MLX backend settings
+#     mlx_model_path: str = ""          # local path or HF repo (e.g. mlx-community/Qwen2.5-7B-4bit)
+#     mlx_output_dir: str = "./mlx_metaclaw_output"
+#
+# Update training_backend_label() around line 168:
+#
+# BEFORE:
+#     def training_backend_label(self) -> str:
+#         return "MinT" if self.resolved_backend_key() == "mint" else "Tinker"
+#
+# AFTER:
+#     def training_backend_label(self) -> str:
+#         key = self.resolved_backend_key()
+#         if key == "mlx":
+#             return "MLX"
+#         return "MinT" if key == "mint" else "Tinker"
+#
+# Update training_backend_banner() around line 171:
+#
+# BEFORE:
+#     def training_backend_banner(self) -> str:
+#         return f"{self.training_backend_label()} cloud RL"
+#
+# AFTER:
+#     def training_backend_banner(self) -> str:
+#         label = self.training_backend_label()
+#         suffix = "local RL" if self.resolved_backend_key() == "mlx" else "cloud RL"
+#         return f"{label} {suffix}"
+
+
+## Optional: pyproject.toml extras
+
+```toml
+[project.optional-dependencies]
+mlx = ["mlx>=0.22.0", "mlx-lm>=0.21.0", "safetensors"]
+```
+
+## Usage
+
+```bash
+# Install with MLX extras
+pip install -e ".[mlx]"
+
+# Configure
+metaclaw setup   # select backend → mlx
+
+# Or via env
+export METACLAW_RL_BACKEND=mlx
+metaclaw start
+```
diff --git a/README.md b/README.md
@@ -441,6 +441,79 @@ Each `ConversationSample` is tagged with a `skill_generation` version. When skil
 
 ---
 
+## 🍎 MLX Backend (Apple Silicon Local RL)
+
+MetaClaw supports **local RL training on Apple Silicon Macs** via the MLX backend. This enables RL training without cloud GPU instances — everything runs locally on your M-series chip.
+
+### Quick Start
+
+```bash
+# Install with MLX extras
+pip install -e ".[mlx]"
+
+# Configure
+metaclaw setup   # select backend → mlx
+
+# Or via env
+export METACLAW_RL_BACKEND=mlx
+metaclaw start
+```
+
+### Configuration
+
+Add to `config.py`:
+```python
+# MLX backend settings
+mlx_model_path: str = ""          # local path or HF repo (e.g. mlx-community/Qwen2.5-7B-4bit)
+mlx_output_dir: str = "./mlx_metaclaw_output"
+```
+
+In `setup_wizard.py`, add `"mlx"` to the backend selection list:
+```python
+# Before:
+["auto", "tinker", "mint"],
+# After:
+["auto", "tinker", "mint", "mlx"],
+```
+
+### Requirements
+
+- Apple Silicon Mac (M1/M2/M3/M4)
+- macOS 13+
+- Python 3.10+
+- `mlx>=0.22.0`, `mlx-lm>=0.21.0`, `safetensors`
+
+### Architecture
+
+The MLX backend implements the same `ServiceClient`, `SamplingClient`, and `LoraTrainingClient` interfaces as the cloud backends, ensuring full compatibility with the MetaClaw training pipeline.
+
+The MLX backend implements the same `ServiceClient`, `SamplingClient`, and `LoraTrainingClient` interfaces as the cloud backends, ensuring full compatibility with the MetaClaw training pipeline.
+
+### Usage
+
+To use the MLX backend, simply configure it during setup or via environment variables:
+
+```bash
+# Install with MLX extras
+pip install -e ".[mlx]"
+
+# Configure via setup
+metaclaw setup   # select backend → mlx
+
+# Or via environment variable
+export METACLAW_RL_BACKEND=mlx
+metaclaw start
+```
+
+The MLX backend enables local RL training on Apple Silicon Macs without requiring cloud GPU instances - everything runs locally on your M-series chip. Configuration options include:
+
+- `mlx_model_path`: Local path or Hugging Face repo (e.g., mlx-community/Qwen2.5-7B-4bit)
+- `mlx_output_dir`: Directory for MLX output (default: ./mlx_metaclaw_output)
+
+In `config.py`, the backend will be labeled as "MLX local RL" when selected, distinguishing it from "Tinker cloud RL" or "MinT cloud RL".
+
+---
+
 ## 🙏 Acknowledgements
 
 MetaClaw builds on top of the following open-source projects:

diff --git a/metaclaw/api_server.py b/metaclaw/api_server.py
@@ -1353,13 +1353,17 @@ async def _forward_to_backend(self, body: dict[str, Any]) -> dict[str, Any]:
             sampling_params = self._sdk.SamplingParams(**sp_kwargs)
 
             # Call active backend
-            response = await self._sampling_client.sample_async(
+            # include_prompt_logprobs / topk_prompt_logprobs are Tinker-specific;
+            # MLX (and potentially other local backends) don't support them.
+            sample_kwargs: dict[str, Any] = dict(
                 prompt=model_input,
                 num_samples=1,
                 sampling_params=sampling_params,
-                include_prompt_logprobs=False,
-                topk_prompt_logprobs=0,
             )
+            if backend_key != "mlx":
+                sample_kwargs["include_prompt_logprobs"] = False
+                sample_kwargs["topk_prompt_logprobs"] = 0
+            response = await self._sampling_client.sample_async(**sample_kwargs)
 
             # Decode response tokens → text
             seq = response.sequences[0]

diff --git a/metaclaw/mlx_backend/__init__.py b/metaclaw/mlx_backend/__init__.py
@@ -0,0 +1,32 @@
+"""
+MLX-native LoRA training backend for MetaClaw.
+
+Provides a local, zero-cloud alternative to the Tinker and MinT backends
+using Apple MLX on Apple Silicon. No API key or network required.
+"""
+
+from .data_types import (
+    Datum,
+    EncodedTextChunk,
+    ModelInput,
+    SampleResponse,
+    SampleSequence,
+    TensorData,
+)
+from .params import AdamParams, SamplingParams
+from .service_client import SamplingClient, SaveStateResult, ServiceClient, TrainingClient
+
+__all__ = [
+    "AdamParams",
+    "Datum",
+    "EncodedTextChunk",
+    "ModelInput",
+    "SampleResponse",
+    "SampleSequence",
+    "SamplingClient",
+    "SamplingParams",
+    "SaveStateResult",
+    "ServiceClient",
+    "TensorData",
+    "TrainingClient",
+]