From 9a9c0b3c2d4a9443432ab07875e9104792c962fa Mon Sep 17 00:00:00 2001 From: fderuiter <127706008+fderuiter@users.noreply.github.com> Date: Wed, 28 Jan 2026 18:47:24 +0000 Subject: [PATCH] Docs: Add diagrams and examples for RL and Morphogenesis - Overhauled `ai/reinforcement_learning/mod.rs`: - Added "Reinforcement Learning" header and core concepts. - Added Mermaid diagram for Agent-Environment loop. - Added Bellman equations in LaTeX. - Added runnable "Quick Start" example (GridWorld solver). - Enhanced `biology/morphogenesis.rs`: - Added Mermaid diagram for Reaction-Diffusion stencil. - Added "Quick Start" example for Turing pattern simulation. - Updated `.jules/curator.md` with a new entry. Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .jules/curator.md | 7 + .../src/ai/reinforcement_learning/mod.rs | 127 ++++++++++++++++++ math_explorer/src/biology/morphogenesis.rs | 49 +++++++ 3 files changed, 183 insertions(+) diff --git a/.jules/curator.md b/.jules/curator.md index 87a7865..1bb1d43 100644 --- a/.jules/curator.md +++ b/.jules/curator.md @@ -59,3 +59,10 @@ **Gap:** The `freesurfer` module was a "Visual Void" with no explanation of the cortical reconstruction pipeline or usage. **Strategy:** Overhauled `freesurfer/mod.rs` with a Mermaid pipeline diagram and a runnable Quick Start example for cortical thickness calculation. **Outcome:** Users can now understand the MRI processing pipeline and use the tools for surface analysis. + +## 2026-02-02 - Closing the Loop on RL and Turing Patterns +**Gap:** The `ai/reinforcement_learning` module was a "Blank Page" with zero documentation, and `biology/morphogenesis` was a "Visual Void" describing reaction-diffusion without visualizing it. +**Strategy:** +- Overhauled `reinforcement_learning/mod.rs` with a Mermaid diagram of the Agent-Environment loop, Bellman equations, and a runnable GridWorld solver. +- Enhanced `morphogenesis.rs` with a Mermaid diagram of the diffusion stencil and a Quick Start example for Turing patterns. +**Outcome:** Users can now understand and simulate intelligent agents and biological patterns without deciphering raw code. diff --git a/math_explorer/src/ai/reinforcement_learning/mod.rs b/math_explorer/src/ai/reinforcement_learning/mod.rs index 0125955..87795c9 100644 --- a/math_explorer/src/ai/reinforcement_learning/mod.rs +++ b/math_explorer/src/ai/reinforcement_learning/mod.rs @@ -1,3 +1,130 @@ +//! # Reinforcement Learning +//! +//! This module provides a framework for defining and solving Markov Decision Processes (MDPs) +//! using standard Reinforcement Learning algorithms. +//! +//! ## 🧠 Core Concepts +//! +//! Reinforcement Learning involves an **Agent** interacting with an **Environment** to maximize cumulative **Reward**. +//! +//! ```mermaid +//! graph LR +//! Agent[🤖 Agent] +//! Env[🌍 Environment] +//! +//! Agent -->|Action $a_t$| Env +//! Env -->|State $s_{t+1}$| Agent +//! Env -->|Reward $r_{t+1}$| Agent +//! +//! style Agent fill:#f9f,stroke:#333,stroke-width:2px +//! style Env fill:#bbf,stroke:#333,stroke-width:2px +//! ``` +//! +//! ### Key Components +//! +//! 1. **State ($S$)**: A representation of the environment at a specific time. +//! 2. **Action ($A$)**: A decision made by the agent. +//! 3. **Policy ($\pi$)**: The agent's strategy, mapping states to actions ($\pi(a|s)$). +//! 4. **Value Function ($V_\pi(s)$)**: The expected cumulative reward from state $s$ under policy $\pi$. +//! +//! ## 📐 Mathematical Foundation +//! +//! We rely on the **Bellman Equations** to solve for optimal policies. +//! +//! **The Bellman Optimality Equation for $Q^*$:** +//! $$ Q^*(s, a) = \sum_{s'} P(s'|s, a) \left[ R(s, a, s') + \gamma \max_{a'} Q^*(s', a') \right] $$ +//! +//! Where: +//! * $P(s'|s, a)$ is the transition probability. +//! * $R(s, a, s')$ is the immediate reward. +//! * $\gamma$ is the discount factor ($0 \le \gamma \le 1$). +//! +//! ## 🚀 Quick Start: Solving a Grid World +//! +//! Here is how to define a simple MDP and solve it using Tabular Q-Learning. +//! +//! ```rust +//! use math_explorer::ai::reinforcement_learning::types::{MarkovDecisionProcess, State, Action}; +//! use math_explorer::ai::reinforcement_learning::algorithms::TabularQAgent; +//! +//! // 1. Define State and Action +//! #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +//! struct GridState(usize); // Position 0..3 +//! +//! #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +//! enum Move { Left, Right } +//! +//! impl State for GridState {} +//! impl Action for Move {} +//! +//! // 2. Define the Environment (MDP) +//! struct CorridorEnv; +//! +//! impl MarkovDecisionProcess for CorridorEnv { +//! type S = GridState; +//! type A = Move; +//! +//! fn transition_probability(&self, next: &GridState, curr: &GridState, action: &Move) -> f64 { +//! let target = match (curr.0, action) { +//! (0, Move::Left) => 0, +//! (0, Move::Right) => 1, +//! (1, Move::Left) => 0, +//! (1, Move::Right) => 2, +//! (2, Move::Left) => 1, +//! (2, Move::Right) => 3, // Goal +//! (3, _) => 3, // Terminal +//! _ => curr.0, +//! }; +//! if next.0 == target { 1.0 } else { 0.0 } +//! } +//! +//! fn reward(&self, _curr: &GridState, _action: &Move, next: &GridState) -> f64 { +//! if next.0 == 3 { 10.0 } else { -1.0 } // Reward for goal, penalty for time +//! } +//! +//! fn actions(&self, _state: &GridState) -> Vec { +//! vec![Move::Left, Move::Right] +//! } +//! +//! fn discount_factor(&self) -> f64 { 0.9 } +//! fn is_terminal(&self, state: &GridState) -> bool { state.0 == 3 } +//! } +//! +//! // 3. Train the Agent +//! fn main() { +//! let env = CorridorEnv; +//! let mut agent = TabularQAgent::new(0.1, env.discount_factor(), 0.1); +//! +//! // Training Loop +//! for _episode in 0..500 { +//! let mut state = GridState(0); +//! while !env.is_terminal(&state) { +//! let action = agent.select_action(&state, &env.actions(&state)).unwrap(); +//! +//! // Simulate transition (deterministic here) +//! // Note: In a real scenario, you sample next_state based on probability. +//! // For this doc example, we hardcode the transition logic to match the env. +//! let next_state = match (state.0, action) { +//! (0, Move::Right) => GridState(1), +//! (1, Move::Right) => GridState(2), +//! (2, Move::Right) => GridState(3), +//! (x, Move::Left) if x > 0 => GridState(x - 1), +//! (x, _) => GridState(x), +//! }; +//! +//! let reward = env.reward(&state, &action, &next_state); +//! +//! agent.update(&state, &action, reward, &next_state, &env.actions(&next_state)); +//! state = next_state; +//! } +//! } +//! +//! // 4. Verify Policy (Should go Right) +//! let best_action = agent.select_action(&GridState(0), &[Move::Left, Move::Right]); +//! assert_eq!(best_action, Some(Move::Right)); +//! } +//! ``` + pub mod algorithms; pub mod bellman; pub mod types; diff --git a/math_explorer/src/biology/morphogenesis.rs b/math_explorer/src/biology/morphogenesis.rs index 468ba10..d7b72f5 100644 --- a/math_explorer/src/biology/morphogenesis.rs +++ b/math_explorer/src/biology/morphogenesis.rs @@ -3,8 +3,57 @@ //! This module implements a Reaction-Diffusion system capable of generating Turing patterns. //! It uses a 1D grid to simulate the interaction between an activator ($u$) and an inhibitor ($v$). //! +//! ## 🧬 Concept: Reaction-Diffusion +//! +//! The patterns emerge from the interplay of local reaction (activation/inhibition) and spatial diffusion. +//! +//! ```mermaid +//! graph LR +//! Cell[Cell i] +//! Left[Cell i-1] +//! Right[Cell i+1] +//! +//! Left -->|Diffusion| Cell +//! Right -->|Diffusion| Cell +//! Cell -->|Reaction| Cell +//! +//! subgraph Inside Cell +//! u[Activator u] +//! v[Inhibitor v] +//! u -->|Activate| u +//! u -->|Activate| v +//! v -->|Inhibit| u +//! end +//! ``` +//! //! The general equation is: //! $$ \frac{\partial \mathbf{u}}{\partial t} = D \nabla^2 \mathbf{u} + \mathbf{f}(\mathbf{u}) $$ +//! +//! ## 🚀 Quick Start +//! +//! ```rust +//! use math_explorer::biology::morphogenesis::TuringSystem; +//! use math_explorer::pure_math::analysis::ode::TimeStepper; +//! +//! // 1. Initialize System +//! // Size=100, Du=1.0, Dv=10.0, dx=1.0 +//! let mut system = TuringSystem::new(100, 1.0, 10.0, 1.0); +//! +//! // 2. Add random noise to initial state to break symmetry +//! // (Here we just add a constant for the doc test, but normally this would be random) +//! for x in system.u_mut().iter_mut() { +//! *x += 0.1; +//! } +//! +//! // 3. Run Simulation +//! let dt = 0.01; +//! for _ in 0..100 { +//! system.step(dt); +//! } +//! +//! // 4. Check results +//! assert!(!system.u().is_empty()); +//! ``` use crate::pure_math::analysis::ode::{OdeSystem, TimeStepper, VectorOperations}; use std::ops::{Add, AddAssign, Mul, MulAssign};