A small reinforcement learning project where a virtual pet learns to reach a cookie and avoid a trap in a 5×5 grid. Train a Q-learning agent, visualize the learned policy as arrows, and watch a greedy demo.
cd path\to\TreatQuest
python -m venv .venv
.\.venv\Scripts\Activate.ps1
python -m pip install --upgrade pip
pip install -r requirements.txt
# train the agent and create artifacts
python -m scripts.train_q_learning
# create policy.png and print the arrow map
python -m scripts.make_policy
# watch one greedy episode (uses saved Q-table)
python -m treatquest.democd /path/to/TreatQuest
python3 -m venv .venv
source .venv/bin/activate
python -m pip install --upgrade pip
pip install -r requirements.txt
python -m scripts.train_q_learning
python -m scripts.make_policy
python -m treatquest.demosaved_models/q_table.npy— learned Q-tableplots/reward_curve.png— reward per episode with moving averageplots/policy.png— arrow map of the greedy action per cellplots/reward_log.csv— per-episode log (episode, total_reward, epsilon)runs/<timestamp or name>/— copy of artifacts plusrun_meta.jsonwith hyperparameters
# random agent smoke test (Phase 1)
python -m scripts.run_random
# Q-learning training (Phase 2)
python -m scripts.train_q_learning
# make the policy image and print a text arrow map (Phase 3)
python -m scripts.make_policy
# run one greedy episode using the saved Q-table (falls back to random if missing)
python -m treatquest.demoTraining
# example: longer run, fixed cookie/trap, small step penalty for shorter paths
python -m scripts.train_q_learning \
--episodes 2000 --max-steps 75 \
--epsilon-decay 0.997 --fixed-positions --step-penalty -0.01 \
--run-name my_experimentDemo
python -m treatquest.demo --steps 75
python -m treatquest.demo --random # force random even if Q-table existstreatquest/ init.py config.py env.py # Grid world: reset(), step(), render_text() q_learning.py # Q-learning agent policy_viz.py # Extract best action per cell, render arrows / image demo.py # Greedy episode using saved Q-table (or random fallback)
scripts/ run_random.py # Random episodes smoke test train_q_learning.py # Training entrypoint with CLI flags make_policy.py # Generates policy.png + prints arrow map make_release.py # Builds a submission zip
plots/ # reward_curve.png, policy.png, reward_log.csv saved_models/ # q_table.npy runs/ # per-run copies of artifacts + run_meta.json tests/ test_env.py test_smoke.py test_train_short.py ui/ demo_gui.py