fderuiter · fderuiter · Jan 28, 2026
diff --git a/.jules/curator.md b/.jules/curator.md
@@ -59,3 +59,10 @@
 **Gap:** The `freesurfer` module was a "Visual Void" with no explanation of the cortical reconstruction pipeline or usage.
 **Strategy:** Overhauled `freesurfer/mod.rs` with a Mermaid pipeline diagram and a runnable Quick Start example for cortical thickness calculation.
 **Outcome:** Users can now understand the MRI processing pipeline and use the tools for surface analysis.
+
+## 2026-02-02 - Closing the Loop on RL and Turing Patterns
+**Gap:** The `ai/reinforcement_learning` module was a "Blank Page" with zero documentation, and `biology/morphogenesis` was a "Visual Void" describing reaction-diffusion without visualizing it.
+**Strategy:**
+- Overhauled `reinforcement_learning/mod.rs` with a Mermaid diagram of the Agent-Environment loop, Bellman equations, and a runnable GridWorld solver.
+- Enhanced `morphogenesis.rs` with a Mermaid diagram of the diffusion stencil and a Quick Start example for Turing patterns.
+**Outcome:** Users can now understand and simulate intelligent agents and biological patterns without deciphering raw code.
diff --git a/math_explorer/src/ai/reinforcement_learning/mod.rs b/math_explorer/src/ai/reinforcement_learning/mod.rs
@@ -1,3 +1,130 @@
+//! # Reinforcement Learning
+//!
+//! This module provides a framework for defining and solving Markov Decision Processes (MDPs)
+//! using standard Reinforcement Learning algorithms.
+//!
+//! ## 🧠 Core Concepts
+//!
+//! Reinforcement Learning involves an **Agent** interacting with an **Environment** to maximize cumulative **Reward**.
+//!
+//! ```mermaid
+//! graph LR
+//!     Agent[🤖 Agent]
+//!     Env[🌍 Environment]
+//!
+//!     Agent -->|Action $a_t$| Env
+//!     Env -->|State $s_{t+1}$| Agent
+//!     Env -->|Reward $r_{t+1}$| Agent
+//!
+//!     style Agent fill:#f9f,stroke:#333,stroke-width:2px
+//!     style Env fill:#bbf,stroke:#333,stroke-width:2px
+//! ```
+//!
+//! ### Key Components
+//!
+//! 1.  **State ($S$)**: A representation of the environment at a specific time.
+//! 2.  **Action ($A$)**: A decision made by the agent.
+//! 3.  **Policy ($\pi$)**: The agent's strategy, mapping states to actions ($\pi(a|s)$).
+//! 4.  **Value Function ($V_\pi(s)$)**: The expected cumulative reward from state $s$ under policy $\pi$.
+//!
+//! ## 📐 Mathematical Foundation
+//!
+//! We rely on the **Bellman Equations** to solve for optimal policies.
+//!
+//! **The Bellman Optimality Equation for $Q^*$:**
+//! $$ Q^*(s, a) = \sum_{s'} P(s'|s, a) \left[ R(s, a, s') + \gamma \max_{a'} Q^*(s', a') \right] $$
+//!
+//! Where:
+//! *   $P(s'|s, a)$ is the transition probability.
+//! *   $R(s, a, s')$ is the immediate reward.
+//! *   $\gamma$ is the discount factor ($0 \le \gamma \le 1$).
+//!
+//! ## 🚀 Quick Start: Solving a Grid World
+//!
+//! Here is how to define a simple MDP and solve it using Tabular Q-Learning.
+//!
+//! ```rust
+//! use math_explorer::ai::reinforcement_learning::types::{MarkovDecisionProcess, State, Action};
+//! use math_explorer::ai::reinforcement_learning::algorithms::TabularQAgent;
+//!
+//! // 1. Define State and Action
+//! #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+//! struct GridState(usize); // Position 0..3
+//!
+//! #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+//! enum Move { Left, Right }
+//!
+//! impl State for GridState {}
+//! impl Action for Move {}
+//!
+//! // 2. Define the Environment (MDP)
+//! struct CorridorEnv;
+//!
+//! impl MarkovDecisionProcess for CorridorEnv {
+//!     type S = GridState;
+//!     type A = Move;
+//!
+//!     fn transition_probability(&self, next: &GridState, curr: &GridState, action: &Move) -> f64 {
+//!         let target = match (curr.0, action) {
+//!             (0, Move::Left) => 0,
+//!             (0, Move::Right) => 1,
+//!             (1, Move::Left) => 0,
+//!             (1, Move::Right) => 2,
+//!             (2, Move::Left) => 1,
+//!             (2, Move::Right) => 3, // Goal
+//!             (3, _) => 3, // Terminal
+//!             _ => curr.0,
+//!         };
+//!         if next.0 == target { 1.0 } else { 0.0 }
+//!     }
+//!
+//!     fn reward(&self, _curr: &GridState, _action: &Move, next: &GridState) -> f64 {
+//!         if next.0 == 3 { 10.0 } else { -1.0 } // Reward for goal, penalty for time
+//!     }
+//!
+//!     fn actions(&self, _state: &GridState) -> Vec<Move> {
+//!         vec![Move::Left, Move::Right]
+//!     }
+//!
+//!     fn discount_factor(&self) -> f64 { 0.9 }
+//!     fn is_terminal(&self, state: &GridState) -> bool { state.0 == 3 }
+//! }
+//!
+//! // 3. Train the Agent
+//! fn main() {
+//!     let env = CorridorEnv;
+//!     let mut agent = TabularQAgent::new(0.1, env.discount_factor(), 0.1);
+//!
+//!     // Training Loop
+//!     for _episode in 0..500 {
+//!         let mut state = GridState(0);
+//!         while !env.is_terminal(&state) {
+//!             let action = agent.select_action(&state, &env.actions(&state)).unwrap();
+//!
+//!             // Simulate transition (deterministic here)
+//!             // Note: In a real scenario, you sample next_state based on probability.
+//!             // For this doc example, we hardcode the transition logic to match the env.
+//!             let next_state = match (state.0, action) {
+//!                 (0, Move::Right) => GridState(1),
+//!                 (1, Move::Right) => GridState(2),
+//!                 (2, Move::Right) => GridState(3),
+//!                 (x, Move::Left) if x > 0 => GridState(x - 1),
+//!                 (x, _) => GridState(x),
+//!             };
+//!
+//!             let reward = env.reward(&state, &action, &next_state);
+//!
+//!             agent.update(&state, &action, reward, &next_state, &env.actions(&next_state));
+//!             state = next_state;
+//!         }
+//!     }
+//!
+//!     // 4. Verify Policy (Should go Right)
+//!     let best_action = agent.select_action(&GridState(0), &[Move::Left, Move::Right]);
+//!     assert_eq!(best_action, Some(Move::Right));
+//! }
+//! ```
+
 pub mod algorithms;
 pub mod bellman;
 pub mod types;
diff --git a/math_explorer/src/biology/morphogenesis.rs b/math_explorer/src/biology/morphogenesis.rs
@@ -3,8 +3,57 @@
 //! This module implements a Reaction-Diffusion system capable of generating Turing patterns.
 //! It uses a 1D grid to simulate the interaction between an activator ($u$) and an inhibitor ($v$).
 //!
+//! ## 🧬 Concept: Reaction-Diffusion
+//!
+//! The patterns emerge from the interplay of local reaction (activation/inhibition) and spatial diffusion.
+//!
+//! ```mermaid
+//! graph LR
+//!     Cell[Cell i]
+//!     Left[Cell i-1]
+//!     Right[Cell i+1]
+//!
+//!     Left -->|Diffusion| Cell
+//!     Right -->|Diffusion| Cell
+//!     Cell -->|Reaction| Cell
+//!
+//!     subgraph Inside Cell
+//!     u[Activator u]
+//!     v[Inhibitor v]
+//!     u -->|Activate| u
+//!     u -->|Activate| v
+//!     v -->|Inhibit| u
+//!     end
+//! ```
+//!
 //! The general equation is:
 //! $$ \frac{\partial \mathbf{u}}{\partial t} = D \nabla^2 \mathbf{u} + \mathbf{f}(\mathbf{u}) $$
+//!
+//! ## 🚀 Quick Start
+//!
+//! ```rust
+//! use math_explorer::biology::morphogenesis::TuringSystem;
+//! use math_explorer::pure_math::analysis::ode::TimeStepper;
+//!
+//! // 1. Initialize System
+//! // Size=100, Du=1.0, Dv=10.0, dx=1.0
+//! let mut system = TuringSystem::new(100, 1.0, 10.0, 1.0);
+//!
+//! // 2. Add random noise to initial state to break symmetry
+//! // (Here we just add a constant for the doc test, but normally this would be random)
+//! for x in system.u_mut().iter_mut() {
+//!     *x += 0.1;
+//! }
+//!
+//! // 3. Run Simulation
+//! let dt = 0.01;
+//! for _ in 0..100 {
+//!     system.step(dt);
+//! }
+//!
+//! // 4. Check results
+//! assert!(!system.u().is_empty());
+//! ```
 
 use crate::pure_math::analysis::ode::{OdeSystem, TimeStepper, VectorOperations};
 use std::ops::{Add, AddAssign, Mul, MulAssign};