From fa3db99da497af9dcefa65b3ded62f35f150ff1b Mon Sep 17 00:00:00 2001
From: GP Saggese <saggese@gmail.com>
Date: Thu, 24 Jul 2025 07:59:37 -0400
Subject: [PATCH 1/3] Update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 CLAUDE.md                                     |    6 +
 config_root/config/test/test_config.py        |   10 +-
 config_root/config/test/test_config_utils.py  |    8 +-
 .../documentation/render_images.py            |    6 +-
 .../Test_notes_to_pdf1.test3/output/test.txt  |    4 +-
 .../output/test.txt                           |   24 -
 .../input/input1.txt                          |    0
 .../output/test.txt                           |    0
 .../input/input1.txt                          |    0
 .../output/test.txt                           |   30 +
 .../input/input1.txt                          | 1817 +++++++++++++++++
 .../output/test.txt                           | 1775 ++++++++++++++++
 .../documentation/test/test_lint_notes.py     |   10 +-
 .../test/test_preprocess_notes.py             |  306 +--
 .../documentation/test/test_render_images.py  |    5 +-
 dev_scripts_helpers/llms/ai_review.py         |    3 +-
 dev_scripts_helpers/llms/llm_transform.py     |    1 +
 .../all.coding_style_guidelines.reference.md  |    3 -
 docs/tools/all.ai_review.how_to_guide.md      |   60 +-
 helpers/hcache_simple.py                      |    2 +-
 helpers/hllm.py                               |    3 +-
 helpers/hmarkdown.py                          |    1 +
 helpers/hmarkdown_coloring.py                 |  122 +-
 helpers/hmarkdown_rules.py                    |   16 +-
 helpers/hmarkdown_slides.py                   |    2 +-
 helpers/hmarkdown_tables.py                   |  120 ++
 helpers/hmarkdown_toc.py                      |   25 +
 helpers/hmkdocs.py                            |   22 +-
 helpers/hplayback.py                          |    8 +-
 helpers/hunit_test.py                         |    4 +-
 helpers/hunit_test_purification.py            |   48 +-
 helpers/test/test_hgit.py                     |    6 +-
 helpers/test/test_hmarkdown_bullets.py        |    8 +-
 helpers/test/test_hmarkdown_coloring.py       |  176 +-
 helpers/test/test_hmarkdown_headers.py        |    4 +-
 helpers/test/test_hmarkdown_rules.py          |   29 +-
 helpers/test/test_hmarkdown_tables.py         |  196 ++
 helpers/test/test_hmarkdown_toc.py            |  101 +
 helpers/test/test_hmkdocs.py                  |   96 -
 helpers/test/test_hparquet.py                 |    4 +-
 helpers/test/test_hunit_test.py               |    8 +-
 helpers/test/test_hunit_test_purification.py  |   55 +-
 helpers/test/test_lib_tasks_docker_release.py |    6 +
 helpers/test/test_repo_config_amp.py          |    8 +-
 helpers/unit_test_template.py                 |   21 -
 linters/test/test_amp_check_import.py         |    8 +-
 linters/test/test_amp_check_shebang.py        |   41 +-
 linters/test/test_amp_class_method_order.py   |  268 +--
 linters/test/test_amp_fix_comment.py          |    3 +-
 unit_test_template.py                         |   28 +-
 50 files changed, 4830 insertions(+), 677 deletions(-)
 delete mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test2/output/test.txt
 rename dev_scripts_helpers/documentation/test/outcomes/{Test_preprocess_notes1.test1 => Test_preprocess_notes_executable1.test1}/input/input1.txt (100%)
 rename dev_scripts_helpers/documentation/test/outcomes/{Test_preprocess_notes1.test1 => Test_preprocess_notes_executable1.test1}/output/test.txt (100%)
 rename dev_scripts_helpers/documentation/test/outcomes/{Test_preprocess_notes1.test2 => Test_preprocess_notes_executable1.test2}/input/input1.txt (100%)
 create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test2/output/test.txt
 create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/input/input1.txt
 create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/output/test.txt
 create mode 100644 helpers/hmarkdown_tables.py
 create mode 100644 helpers/hmarkdown_toc.py
 create mode 100644 helpers/test/test_hmarkdown_tables.py
 create mode 100644 helpers/test/test_hmarkdown_toc.py
 delete mode 100644 helpers/unit_test_template.py

diff --git a/CLAUDE.md b/CLAUDE.md
index 256b163a7..70a766646 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -104,3 +104,9 @@ import config_root.config.config_ as crococon
 
 ### Code Conventions
 - Coding guidelines are in `docs/code_guidelines/all.coding_style_guidelines.reference.md`
+
+- Templates for code are:
+  - `code_template.py`: template for code
+  - `unit_test_template.py`: template for unit test
+  - `dev_scripts_helpers/coding_tools/script_template.py`: template for
+    self-standing Python script
diff --git a/config_root/config/test/test_config.py b/config_root/config/test/test_config.py
index f75c0acef..cd4b35547 100644
--- a/config_root/config/test/test_config.py
+++ b/config_root/config/test/test_config.py
@@ -2300,13 +2300,19 @@ def execute_stmt(
             raise ValueError(f"Invalid mode={mode}")
         _LOG.debug("config=\n%s", actual)
         if expected is not None:
-            self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True)
+            self.assert_equal(
+                actual, expected, purify_text=True, fuzzy_match=True
+            )
         # Package the output.
         actual = hprint.frame(stmt) + "\n" + actual
         return actual
 
     def raise_stmt(
-        self, stmt: str, assertion_type: Any, expected: Optional[str], globals_: Dict
+        self,
+        stmt: str,
+        assertion_type: Any,
+        expected: Optional[str],
+        globals_: Dict,
     ) -> None:
         _LOG.debug("\n" + hprint.frame(stmt))
         with self.assertRaises(assertion_type) as cm:
diff --git a/config_root/config/test/test_config_utils.py b/config_root/config/test/test_config_utils.py
index 65eae3c74..15fbea709 100644
--- a/config_root/config/test/test_config_utils.py
+++ b/config_root/config/test/test_config_utils.py
@@ -416,7 +416,9 @@ def test1(self) -> None:
         config1 = _get_test_config1()
         config2 = _get_test_config2()
         #
-        actual = cconfig.build_config_diff_dataframe({"1": config1, "2": config2})
+        actual = cconfig.build_config_diff_dataframe(
+            {"1": config1, "2": config2}
+        )
         actual = hpandas.df_to_str(actual, num_rows=None)
         #
         expected = pd.DataFrame(
@@ -433,7 +435,9 @@ def test2(self) -> None:
         """
         config1 = _get_test_config1()
         #
-        actual = cconfig.build_config_diff_dataframe({"1": config1, "2": config1})
+        actual = cconfig.build_config_diff_dataframe(
+            {"1": config1, "2": config1}
+        )
         actual = hpandas.df_to_str(actual, num_rows=None)
         #
         expected = """
diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py
index bdc4c33e0..9db0f1a5d 100755
--- a/dev_scripts_helpers/documentation/render_images.py
+++ b/dev_scripts_helpers/documentation/render_images.py
@@ -101,8 +101,10 @@ def _get_rendered_file_paths(
 # #############################################################################
 
 
-# Save cache to disk for persistence.
-@hcacsimp.simple_cache(write_through=True)
+@hcacsimp.simple_cache(
+    # Save cache to disk for persistence.
+    write_through=True
+)
 def _render_image_code(
     image_code_txt: str,
     image_code_idx: int,
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/output/test.txt
index 80fcc1126..2eff818f4 100644
--- a/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/output/test.txt
+++ b/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/output/test.txt
@@ -7,10 +7,10 @@ $GIT_ROOT/dev_scripts_helpers/documentation/preprocess_notes.py --input $GIT_ROO
 # render_images
 $GIT_ROOT/dev_scripts_helpers/documentation/render_images.py --in_file_name $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.preprocess_notes.txt --out_file_name $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.render_image.txt
 # run_pandoc
-docker run --rm --user $(id -u):$(id -g) -e AM_GDRIVE_PATH -e AM_TELEGRAM_TOKEN -e CSFY_AWS_PROFILE -e CSFY_AWS_S3_BUCKET -e CSFY_ECR_BASE_PATH -e CSFY_HOST_NAME -e CSFY_HOST_OS_NAME -e CSFY_HOST_OS_VERSION -e CSFY_HOST_USER_NAME -e OPENAI_API_KEY -e OPENROUTER_API_KEY -e QUANDL_API_KEY --workdir /app --mount type=bind,source=$GIT_ROOT,target=/app tmp.pandoc_texlive.arm64.8689d816.xxxxxxxx /helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.render_image2.txt --output /helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.tex --template /helpers_root/dev_scripts_helpers/documentation/pandoc.latex -V geometry:margin=1in -f markdown --number-sections --highlight-style=tango -s -t latex
+docker run --rm --user $(id -u):$(id -g) -e AM_CONTAINER_VERSION -e CSFY_AWS_ACCESS_KEY_ID -e CSFY_AWS_DEFAULT_REGION -e CSFY_AWS_PROFILE -e CSFY_AWS_S3_BUCKET -e CSFY_AWS_SECRET_ACCESS_KEY -e CSFY_AWS_SESSION_TOKEN -e CSFY_CI -e CSFY_ECR_BASE_PATH -e CSFY_ENABLE_DIND -e CSFY_FORCE_TEST_FAIL -e CSFY_GIT_ROOT_PATH -e CSFY_HELPERS_ROOT_PATH -e CSFY_HOST_GIT_ROOT_PATH -e CSFY_HOST_NAME -e CSFY_HOST_OS_NAME -e CSFY_HOST_OS_VERSION -e CSFY_HOST_USER_NAME -e CSFY_REPO_CONFIG_CHECK -e CSFY_REPO_CONFIG_PATH -e CSFY_TELEGRAM_TOKEN -e CSFY_USE_HELPERS_AS_NESTED_MODULE -e OPENAI_API_KEY --workdir $GIT_ROOT --mount type=bind,source=$CSFY_HOST_GIT_ROOT_PATH,target=$GIT_ROOT tmp.pandoc_texlive.aarch64.xxxxxxxx $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.render_image2.txt --output $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.tex --template $GIT_ROOT/dev_scripts_helpers/documentation/pandoc.latex -V geometry:margin=1in -f markdown --number-sections --highlight-style=tango -s -t latex
 # latex
 cp -f $GIT_ROOT/dev_scripts_helpers/documentation/latex_abbrevs.sty $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch
-docker run --rm --user $(id -u):$(id -g) -e AM_GDRIVE_PATH -e AM_TELEGRAM_TOKEN -e CSFY_AWS_PROFILE -e CSFY_AWS_S3_BUCKET -e CSFY_ECR_BASE_PATH -e CSFY_HOST_NAME -e CSFY_HOST_OS_NAME -e CSFY_HOST_OS_VERSION -e CSFY_HOST_USER_NAME -e OPENAI_API_KEY -e OPENROUTER_API_KEY -e QUANDL_API_KEY --workdir /app --mount type=bind,source=$GIT_ROOT,target=/app tmp.latex.arm64.2f590c86.xxxxxxxx pdflatex -output-directory /helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch --interaction=nonstopmode --halt-on-error --shell-escape /helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.tex
+docker run --rm --user $(id -u):$(id -g) -e AM_CONTAINER_VERSION -e CSFY_AWS_ACCESS_KEY_ID -e CSFY_AWS_DEFAULT_REGION -e CSFY_AWS_PROFILE -e CSFY_AWS_S3_BUCKET -e CSFY_AWS_SECRET_ACCESS_KEY -e CSFY_AWS_SESSION_TOKEN -e CSFY_CI -e CSFY_ECR_BASE_PATH -e CSFY_ENABLE_DIND -e CSFY_FORCE_TEST_FAIL -e CSFY_GIT_ROOT_PATH -e CSFY_HELPERS_ROOT_PATH -e CSFY_HOST_GIT_ROOT_PATH -e CSFY_HOST_NAME -e CSFY_HOST_OS_NAME -e CSFY_HOST_OS_VERSION -e CSFY_HOST_USER_NAME -e CSFY_REPO_CONFIG_CHECK -e CSFY_REPO_CONFIG_PATH -e CSFY_TELEGRAM_TOKEN -e CSFY_USE_HELPERS_AS_NESTED_MODULE -e OPENAI_API_KEY --workdir $GIT_ROOT --mount type=bind,source=$CSFY_HOST_GIT_ROOT_PATH,target=$GIT_ROOT tmp.latex.aarch64.xxxxxxxx pdflatex -output-directory $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch --interaction=nonstopmode --halt-on-error --shell-escape $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.tex
 # latex again
 \cp -af $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/tmp.notes_to_pdf.pdf $GIT_ROOT/dev_scripts_helpers/documentation/test/outcomes/Test_notes_to_pdf1.test3/tmp.scratch/output.pdf
 # copy_to_gdrive
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test2/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test2/output/test.txt
deleted file mode 100644
index e67dbcfad..000000000
--- a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test2/output/test.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-\let\emph\textit
-\let\uline\underline
-\let\ul\underline
-# 14, Topics in Demand and Supply Analysis (p. 6, 843)
-
-## Introduction
-
-- **Economics**
-  - Economics is the study of:
-      - production
-      - distribution
-      - consumption
-  - Macroeconomics deals with aggregated economic quantities
-      - E.g., national output and national income
-  - Microeconomics deals with markets and decision making of individual economic
-    units
-      - E.g., consumers, businesses
-  - Microeconomics classifies private economic units into:
-      - consumers (aka households)
-          - consumption (= demand for goods and services)
-          - utility maximizing individuals (i.e., maximizing satisfaction from
-            present and future consumption)
-      - businesses (aka companies, firms)
-          - supply of goods and services by profit maximizing firms
\ No newline at end of file
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test1/input/input1.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test1/input/input1.txt
similarity index 100%
rename from dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test1/input/input1.txt
rename to dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test1/input/input1.txt
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test1/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test1/output/test.txt
similarity index 100%
rename from dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test1/output/test.txt
rename to dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test1/output/test.txt
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test2/input/input1.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test2/input/input1.txt
similarity index 100%
rename from dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes1.test2/input/input1.txt
rename to dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test2/input/input1.txt
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test2/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test2/output/test.txt
new file mode 100644
index 000000000..fae3ebf94
--- /dev/null
+++ b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test2/output/test.txt
@@ -0,0 +1,30 @@
+---
+fontsize: 10pt
+---
+\let\emph\textit
+\let\uline\underline
+\let\ul\underline
+# 14, Topics in Demand and Supply Analysis (p. 6, 843)
+
+## Introduction
+
+- **Economics**
+- Economics is the study of:
+    - production
+    - distribution
+    - consumption
+
+- Macroeconomics deals with aggregated economic quantities
+    - E.g., national output and national income
+
+- Microeconomics deals with markets and decision making of individual economic
+  units
+    - E.g., consumers, businesses
+
+- Microeconomics classifies private economic units into:
+    - consumers (aka households)
+        - consumption (= demand for goods and services)
+        - utility maximizing individuals (i.e., maximizing satisfaction from
+          present and future consumption)
+    - businesses (aka companies, firms)
+        - supply of goods and services by profit maximizing firms
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/input/input1.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/input/input1.txt
new file mode 100644
index 000000000..ef2e285d6
--- /dev/null
+++ b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/input/input1.txt
@@ -0,0 +1,1817 @@
+::: columns
+:::: {.column width=15%}
+![](lectures_source/UMD_Logo.png)
+::::
+:::: {.column width=75%}
+
+\vspace{0.4cm}
+\begingroup \large
+MSML610: Advanced Machine Learning
+\endgroup
+::::
+:::
+
+\vspace{1cm}
+
+\begingroup \Large
+**$$\text{\blue{Machine Learning Techniques}}$$**
+\endgroup
+\vspace{1cm}
+
+**References**:
+
+- AIMA: ?
+
+- Hastie: ?
+
+// Model assessment and selection
+// Hastie 7 (p. 238)
+
+# ##############################################################################
+# Paradigms
+# ##############################################################################
+
+* Machine Learning Paradigms with Examples (1/3)
+
+- **Supervised Learning**
+  - Learn from labeled data to predict labels for new inputs
+  - E.g., image classification using ResNet on ImageNet
+
+- **Unsupervised Learning**
+  - Discover hidden patterns or structure in unlabeled data
+  - E.g., K-means clustering for customer segmentation
+
+- **Reinforcement Learning**
+  - Learn through interaction with an environment, receiving rewards/punishments
+  - E.g., deep Q-Learning for playing Atari games
+
+- **Self-Supervised Learning**
+  - Generate pseudo-labels from unlabeled data to pre-train models
+  - E.g., BERT (Masked Language Modeling)
+
+- **Semi-Supervised Learning**
+  - Combine small labeled data with large unlabeled data to improve performance
+  - E.g., named entity recognition (NER) using annotated sentences with entity
+    tags combined with many raw text documents
+
+* Machine Learning Paradigms with Examples (2/3)
+
+- **Online Learning**
+  - Learn incrementally from a stream of data in real time
+  - E.g., online logistic regression for click-through rate prediction
+
+- **Multi-Task Learning**
+  - Train simultaneously a model to perform multiple related tasks
+  - E.g., learn sentiment analysis and question answering
+
+- **Meta-Learning**
+  - "Learning to learn": adapt quickly to new tasks using prior experience
+  - E.g., a model can be fine-tuned quickly on a new task using just a few
+    gradient steps
+
+- **Zero-Shot / Few-Shot Learning**
+  - Generalize to new tasks with no or few labeled examples
+  - E.g., GPT-4 solving tasks with zero-shot prompting
+
+- **Active Learning**
+  - The model selects the most informative samples to be labeled by an oracle
+    (e.g., a human)
+  - E.g., pick samples where the model is least confident to get more examples
+
+* Machine Learning Paradigms with Examples (3/3)
+
+- **Federated Learning**
+  - Train models across decentralized devices without sharing raw data
+  - E.g., fraud detection or credit scoring across banks
+
+- **Evolutionary Learning**
+  - Optimize model structures or parameters using evolutionary algorithms
+    inspired by natural selection and genetics
+  - Gradient free, global search, discrete structures, variable length inputs
+  - E.g., genetic algorithms
+
+- **Curriculum Learning**
+  - Train models on easier tasks first, gradually increasing difficulty
+  - E.g., curriculum-based training in robotic control simulations
+
+- **Multi-Agent Learning**
+  - Multiple agents learn and interact in shared environments, often in
+    game-theoretic settings (e.g., competition, collaboration)
+  - E.g., AlphaStar to play StarCraft II
+
+* Supervised Learning
+- Learn a function $f: X \to Y$ that maps inputs to correct outputs using
+  training examples $(\vx, y)$ where inputs and correct output pairs are known
+  - Requires labeled data for training
+  - Measure performance with error on a separate test set
+
+- **Classification**: output is a discrete label, e.g.,
+  - `Spam` vs `Not Spam`
+  - Digit recognition `0`, `1`, ...
+  - Sentiment analysis `Pos`, `Neg`, `Neutral`
+
+- **Regression**: output is a continuous value, e.g.,
+  - House prices given features like size and location
+  - House demand
+  - Stock prices
+
+- **Common algorithms**:
+  - Linear Regression
+  - Decision Trees
+  - K-nearest neighbors
+  - Neural Networks
+  - ...
+
+* Unsupervised Learning
+- Learn from data **without** labeled outputs
+  - Goal: discover patterns, groupings, or structure in the data
+  - No explicit feedback signal
+  - Evaluation can be qualitative
+
+- **Main techniques**:
+  - **Clustering**: Group similar examples, e.g.,
+    - Customer segmentation
+    - Grouping news articles by topic without knowing the topics
+  - **Dimensionality Reduction**: Reduce number of variables with PCA while
+    preserving structure
+    - E.g., visualize high-dimensional data in 2D
+  - **Density Estimation**: Estimate probability distribution of data
+    - E.g., anomaly detection in server logs
+  - **Association Rule Learning**: Discover interesting relations between
+    variables
+    - E.g., market basket analysis (e.g., "people who buy X also buy Y")
+
+- **Common algorithms:**
+  - K-means
+  - PCA
+  - Autoencoders
+
+* Reinforcement Learning
+- Learn by **interacting with an environment** to **maximize cumulative reward**
+  - Learn policy $\pi(s) \to a$ that maximizes expected reward
+  - Trade-off between exploration (trying new actions) and exploitation (using
+    known good actions)
+  - Environments provide clear rules and feedback (win/loss/reward)
+  - Often involve physical simulation or real-world interaction
+
+::: columns
+:::: {.column width=60%}
+- **Core elements:**
+  - Agent: Learner and decision maker
+  - Environment: Everything the agent interacts with
+  - State $s$
+  - Action $a$
+  - Reward $r$
+
+- **Algorithms:**
+  - Q-learning
+  - Policy Gradient methods
+::::
+:::: {.column width=35%}
+```graphviz
+digraph BayesianFlow {
+    splines=true;
+    nodesep=1.0;
+    ranksep=0.75;
+
+    node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=12, penwidth=1.4];
+
+    // Node styles
+    Agent      [label="Agent", shape=box, fillcolor="#F4A6A6"];
+    Env        [label="Environment", shape=box, fillcolor="#B2E2B2"];
+
+    // Force ranks
+    //{ rank=same; Agent; Env; }
+
+    // Edges
+    Agent -> Agent [label="  State", fontcolor=black, labeldistance=2.0];
+    Agent -> Env [label="  Action", fontcolor=black, labeldistance=2.0];
+    Env -> Agent [label="  Reward", fontcolor=black, labeldistance=2.0];
+}
+```
+::::
+:::
+
+* Reinforcement Learning: Examples
+- In game playing, learn strategies through trial and error
+	- E.g., AlphaGo mastering the game of Go
+- In robotics, learn control policies for movement and manipulation
+- In autonomous driving, learn safe and efficient driving behaviors
+- In resource management, optimize allocation of limited resources over time
+	- E.g., data center cooling or CPU job scheduling
+- In personalized recommendations, adapt suggestions based on user interaction
+	- E.g., newsfeed ranking adjusting based on user clicks
+- In healthcare, optimize treatment plans over time
+
+# ##############################################################################
+# Techniques
+# ##############################################################################
+
+## #############################################################################
+## Machine Learning in Practice
+## #############################################################################
+
+* Machine Learning Flow
+- **Question**
+  - E.g., "How can we predict house prices?"
+- **Input data**
+  - E.g., historical data of house sales
+- **Features**
+  - E.g., number of bedrooms, location, square footage
+- **Algorithm**
+  - E.g., linear regression, decision trees
+- **Parameters**
+  - E.g., learning rate, number of trees in a random forest
+- **Evaluation**
+  - E.g., accuracy, precision, recall
+
+* Machine Learning Flow
+
+```graphviz[height=80%]
+digraph BayesianFlow {
+    rankdir=LR;
+    splines=true;
+    nodesep=1.0;
+    ranksep=0.75;
+    node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=12, penwidth=1.4];
+    // Node styles
+    "Question" [fillcolor="#F4A6A6"];
+    "Input data" [fillcolor="#FFD1A6"];
+    "Features" [fillcolor="#B2E2B2"];
+    "Algorithm" [fillcolor="#A0D6D1"];
+    "Parameters" [fillcolor="#A6E7F4"];
+    "Evaluation" [fillcolor="#A6C8F4"];
+    // Force ranks
+    // Edges
+    "Question" -> "Input data";
+    "Input data" -> "Features";
+    "Features" -> "Algorithm";
+    "Algorithm" -> "Parameters";
+    "Parameters" -> "Evaluation";
+}
+```
+
+- **Not all phases are equally important!**
+  - Question $>$ Data $>$ Features $>$ Algorithm
+
+- Clarity of the question impacts project success
+- Quality and relevance of data are crucial for performance
+- Proper feature selection simplifies the model and improves accuracy
+- Algorithm is often less important (contrary to popular belief!)
+
+* Question
+- **Make the question concrete and precise**
+  - Define the problem clearly
+  - Specify inputs and expected outputs
+  - Align question with business or research objectives
+  - E.g.,:
+    - **Bad**: _"How can we improve sales?"_
+    - **Good**: _"What factors most significantly impact sales of product X in
+      region Y during season Z?"_
+
+- Formulating question is **the most important part** of the machine learning
+problem
+  - Misunderstanding leads to:
+    - Solving the wrong problem
+    - Collecting wrong data
+    - ...
+
+- _"If I were given one hour to save the planet, I would spend 59 minutes
+  defining the problem and one minute resolving it"_ (Albert Einstein)
+
+* Input Data
+- Ensure **data is specific to prediction** goal
+  - E.g., use known movie ratings to predict unseen movie ratings from the same
+    population
+  - Training set $\approx$ test set
+
+- Relationship between data and prediction goal is **not always direct**
+  - E.g., interested in prices but predict supply and demand instead
+
+- Poor-quality data leads to inaccurate predictions
+  - _"Garbage in - garbage out"_
+
+- Recognize **when data is insufficient** for valid answers
+  - _"Combination of data and desire for an answer does not ensure a reasonable
+    answer can be extracted"_ (John Tukey)
+
+- **More data vs better models**
+  - Meta-studies show difference between generic and best model is like
+    5\%
+  - _"It's not who has the best algorithm that wins. It's who has the most
+    data"_ (Google researcher)
+  - _"Every time I fire a linguist, the performance of the speech recognizer
+    goes up"_ (IBM researcher in speech recognition)
+
+* Features
+- **Features** provide high-level information about inputs
+  - E.g., use intensity and symmetry for scanned numbers instead of raw bit maps
+
+- **Characteristics of good features**:
+  1. Enable data compression
+  2. Retain relevant information
+  3. Often created with expert knowledge
+
+- **Common mistakes in feature building**:
+  1. Automating feature selection may lead to overfitting
+     - Black box predictions can be accurate but stop working anytime
+     - E.g., Google Flu's unclear feature-model link
+  2. Ignoring data-specific quirks
+     - E.g., mislabeling outliers
+  3. Unnecessarily discarding information
+
+* Models
+- Best models are:
+  - **Interpretable**
+    - Allow users to understand and trust the model's decisions
+    - E.g., decision trees are appropriate in medical studies since they produce
+      a "reasoning"
+  - **Simple**
+    - Easier to implement and maintain
+    - Reduces the risk of overfitting
+  - **Accurate**
+    - Often accuracy is traded off for remaining characteristics
+    - E.g., accuracy vs interpretability, accuracy vs speed
+  - **Fast**
+    - To train and test
+    - Essential for real-time applications
+    - Reduces computational costs
+  - **Scalable**
+    - Can handle large datasets efficiently
+    - Important for growing data and user bases
+    - E.g., in the Netflix prize, Netflix didn't end up implementing the best
+      algorithm since it wasn't scalable enough
+
+## #############################################################################
+## How to Do Research
+## #############################################################################
+
+### ############################################################################
+### Simple Is Better
+### ############################################################################
+
+* Occam'S Razor
+- _The **simplest** model that fits the data is also the **most plausible**_
+  (Occam)
+  - Trim the model to the bare minimum necessary to explain the data
+  - _"An explanation of the data should be as simple as possible, but not
+    simpler"_ (Einstein?)
+  - **Simple** means:
+    - Less likely to fit a given data by coincidence
+    - An unlikely event is more significant if it happens (formalized in terms
+      entropy)
+  - **Better** means better out of sample performance
+
+- An object is **simple** when it is one of few possible objects
+  - Polynomial of order 2 is simpler than a polynomial of order 17
+    - There are many more polynomials of order 17 compared to order 2, although
+      both are infinite sets
+  - SVM (Support Vector Machine) characteristics:
+    - The separating hyperplane appears wiggly, but it is defined by a few
+      support vectors
+  - Complexity of a hypothesis $h$
+    - E.g., polynomial order, MDL (describe the hypothesis in terms of bits),
+      Kolmogorov complexity
+  - Complexity of a hypothesis set $\calH$
+    - E.g., VC dimension of the model
+  - Complexity of $h$ and $\calH$ are related by counting: if we need $l$ bits
+    to specify $h$, then $h$ is one of $2^l$ elements of a set $\calH$
+
+* Model Soundness
+- We cannot blindly accept the result of modeling
+  - A model should tell a story
+  - Always ask yourself: _"what criticisms would you give to the model if it was
+    presented to us for the first time?"_
+
+- Benchmark models: what are the performance if the model outputs:
+  - Outputs always 0 or 1
+    - E.g., long-only model for stock predictions
+  - Random results
+    - I.e., bootstrap of null hypothesis "there is no prediction power"
+
+- A perfect fit can mean nothing, e.g.,
+  - Get 2 data points on a plane
+    - Fit data with a linear relationship
+    - It is a perfect fit
+  - This means nothing since:
+    - There is always a line between 2 points
+    - The data cannot falsify the hypothesis
+  - The model (line) is too complex for the data set (only 2 points)
+
+* Sampling Bias
+- A model, when learning, sees the world in terms of the training data
+  - If data is sampled in a biased way, learning will produce a biased outcome
+
+- Formally: one of the few hypothesis of Hoeffding in learning theory is that
+  training and testing distributions are the same
+
+- Addressing sampling bias
+  - Weight or re-sample data to match testing distribution
+  - If data points have zero probability ($\Pr = 0$) in the data set, no
+    remedies are possible
+
+* Data Snooping
+- **Data snooping** is the improper use of data that biases ML model results
+  - Common trap for practitioners
+
+- **Sources** of data snooping
+  1. Contamination of training and test sets
+  2. Multiple testing issue
+  3. If data affects any learning step (e.g., feature engineering, model
+     selection, hyperparameter tuning), its assessment becomes optimistic
+
+- **Effects** of data snooping
+  - Models show inflated performance metrics which do not translate out of
+    sample
+  - Snooping leads to seemingly better performance:
+    - It is a "happy minefield"
+
+* "Burning the Test Set"
+- Repeatedly using the same data eventually leads to "success"
+  - The model starts fitting to specific data quirks
+  - The test set should not be used for training; this leads to over-optimism
+  - _"If you torture the data long enough, it will confess whatever you want"_
+
+- Solutions:
+  - Use the test set _exactly once_
+  - The VC dimension applies to the overall learning model, including all
+    attempted models
+  - MDL accounts for the number of fitting attempts in overfitting measurement
+  - Adjust p-values for multiple experiments
+
+### ############################################################################
+### Research Methodology
+### ############################################################################
+
+* How to Achieve Out-Of-Sample Fit
+- Goal: choose an hypothesis $g$ approximates the unknown target hypothesis $f$
+
+  $$
+  g \approx f \iff E_{out}(g) \approx 0
+  $$
+
+- Solution:
+  - Achieve
+    1. Good in-sample performance $E_{in}(g) \approx 0$
+    2. Good generalization $E_{out}(g) \approx E_{in}(g)$
+  - Then 1. + 2. $\implies$ good out-of-sample performance
+    $E_{out}(g) \approx 0$
+
+* What to Do If Out-Of-Sample Fit Is Poor?
+- The model performs well in sample ($E_{in} \approx 0$) but poorly out of
+  sample ($E_{out} \gg E_{in}$)
+
+- What does it mean?
+  - The in-sample performance are optimistic
+  - The model is overfitted and fails to generalize
+
+- What do we do?
+  - Run diagnostics before embarking in long term projects
+  - Gain insight on what works / does not work to understand how to improve
+    performance
+    - E.g., bias-variance curves and learning curves
+
+- How to fix?
+  - It depends on the diagnostics!
+  1. Training data
+     - Get more training data (it can take long time) $\iff$ fixes high variance
+  2. Features
+     - Remove features $\iff$ fixes high variance
+     - Add more features (it can take long time) $\iff$ fixes high bias
+     - Add derived features (e.g., polynomial features) $\iff$ fixes high bias
+  3. Regularization
+     - Decrease regularization amount $\lambda$ $\iff$ fixes high bias
+     - Increase regularization amount $\lambda$ $\iff$ fixes high variance
+
+* Why Using a Lot of Data?
+- Several studies show that:
+  - Different algorithms/models have remarkably similar performance
+  - Increasing training set improves performance
+
+- Thus it holds that:
+
+  $$
+  \text{High capacity model + massive training set = good performance}
+  $$
+
+- Using a high capacity model with many parameters (e.g., neural network)
+  $$
+  E_{in} \approx 0
+  $$
+  due to low bias (and high variance)
+- A massive data set helps avoid overfitting
+  $$
+  E_{out} \approx E_{in}
+  $$
+- These two conditions together
+
+  $$
+  E_{out} \approx E_{in} \approx 0 \implies E_{out} \approx 0
+  $$
+
+* What to Do When One Has Lots of Data?
+- You have $m$ = 100M examples in data set, what do you do?
+
+- Training on a lot of data might yield scalability issue:
+  - Slow
+  - Lots of compute
+  - Require work on infrastructure
+  - ...
+
+- Plot the learning curves as function of increasing
+  $m = 1k, 10k, 100k, 1M, ...$
+  - If the algorithm has large bias, it converges (training and validation
+    performance are similar) at $m = 1000$
+    - Add more features and complicate the model rather than training on 100M
+      instances
+  - If the variance is large, use all instances to train the model
+
+* Why We Do Things?
+- Always
+  - Ask: _"Why are we doing something?"_
+    - To understand the purpose of the task
+  - Ask: _"What do we hope to determine by performing the task?"_
+    - To clarify goals and outcomes of the task
+  - Encourage thinking about actions with the bigger picture in mind
+  - Avoid merely going through motions
+  - Promote critical thinking and awareness
+  - Prioritize tasks by importance and impact
+
+- E.g., when conducting a customer survey, ask:
+  - _"Why is feedback being collected?"_
+    - To improve product features and customer service
+  - _"What is the desired outcome?"_
+    - To identify areas for improvement and innovation
+
+- E.g., before starting a marketing campaign, ask:
+  - _"Why is this campaign run?"_
+    - To increase brand awareness or drive sales
+  - _"What are the specific goals?"_
+    - Set target number of new leads or click-through rates
+
+* Summary of the Results, Next Steps, Follow Ups
+- Always have a summary of the results
+  - It's like a high-level map of what we have done and what we have discovered
+    - E.g., "smoothing model coefficients helps"
+  - Highlight major findings
+  - Interpret the results
+    - E.g., _"The increase in sales is likely due to the new marketing
+      strategy."_
+  - Conclusions
+    - Summarize what the data suggests or confirms
+    - E.g., _"Our hypothesis that user engagement increases retention is
+      supported"_
+
+- Always have a reference to more detailed results
+  - Provide quick insights before diving into details
+
+- Always have next steps / follow-ups
+  - What do you expect that will happen?
+  - What results do you expect?
+    - Like thinking $n$ moves ahead in chess
+  - E.g., _"Next, we will conduct a detailed analysis on the demographics
+    contributing most to sales growth"_
+  - Outline potential experiments or analyses to validate findings further
+
+* Example of Spam Filter Classification
+- We use $N = 4$ words in an email to distinguish spam from non-spam emails
+  using logistic regression
+  - Words can be: `buy`, `now`, `deal`, `discount`, `<your name>`
+
+- How to improve the performance of this classifier?
+  1. Collect more data
+     - E.g., honeypot project: set up fake email account and collect spam
+  2. Use better features
+     - E.g., email routing information: spammers use unusual accounts and mask
+       emails as legitimate
+  3. Use better features from message body
+  4. Detect intentional misspellings
+     - Spammers use misspelled words (e.g., `w4tch` for `watch`) to confuse the
+       classifier
+     - Use stemming software
+
+* Right and Wrong Approach to Research
+
+- **Bad**
+  1. It is not clear how to prioritize the different possible tasks
+  2. Use gut feeling and pick one task
+  3. Complete the task
+  4. Re-evaluate performance
+
+- **Good**
+  1. Build a simple algorithm
+     - Within 1 day
+  2. Set up the performance evaluation framework
+     - A single number and bounds to evaluate
+     - Aim to improve that number
+     - Evaluate with cross-validation
+  3. Set up diagnostic tools
+     - Compute learning and bias-variance curves
+     - Avoid premature optimization by understanding the issue before fixing it
+  4. Manually review misclassified emails in the cross-validated set
+     - What features might help to improve performance?
+     - E.g., what types of emails are misclassified?
+
+- Sometimes an approach must be tried to see if it works
+  - E.g., stemming software to consider certain words equivalent
+
+## #############################################################################
+## Pipeline Organization
+## #############################################################################
+
+* How Are Machine Learning Systems Organized?
+- Machine learning systems are typically organized in a pipeline
+  1. Break down the problem into sub-problems
+  2. Solve problems one at the time
+  3. Combine the solutions to the sub-problems into a solution to the initial
+     problem
+
+- The performance $p$ of the entire ML pipeline are given by:
+
+  $$
+  p_{system} = \sum_i p_i \cdot \alpha_i
+  $$
+
+  where:
+  - $p_i$ is the performance of each stage $p_i$
+  - $\alpha_i$ is the importance of each stage
+
+* ML Pipeline: Example of Photo OCR System
+- Goal: build systems to read text in a picture
+  - OCR = "Optical Character Recognition"
+
+- Stages of ML pipeline for OCR:
+  - Text detection: find areas of the picture with text
+  - Character segmentation: split text into boxes, one per letter
+    - E.g., `h e l l o`
+  - Character classification: classify characters, one at a time
+  - Spelling correction: fix errors in text using context
+    - E.g., `hell0` corrected to `hello`
+
+- Issues with text detection:
+  - Unknown text location and size
+
+- Solution
+  - Use a sliding window classifier
+  - Works as evaluating a classifier is often cheap compared to training
+  - Sliding window classifiers can be used for text detection and character
+    segmentation
+
+- **Text detection**
+  - Train a classifier to recognize letters vs non-letters
+  - Scan image in two directions, different sizes looking for text
+  - Create a map of text likelihood (e.g., heatmap) using classifier
+    probabilities
+  - Enclose text areas in boxes
+  - Discard boxes not fitting aspect ratio (valid text width > height)
+
+- **Character segmentation**
+  - Use sliding window classifiers to find "breaks" between characters
+  - Use a 1D sliding window for character segmentation
+
+* The Ideal Recipe for ML
+- The ideal recipe for ML is:
+
+  $$
+  \text{low-bias algorithm + massive amount of data to train}
+  $$
+  - Use learning curves to make sure we are taking advantage of more data
+
+- Always ask yourself: _"how much work is to get 10x more data than we currently
+  have?"_
+- Often it is not that difficult:
+  1. Artificial data
+     - E.g., synthesize or amplify data set
+  2. Collect and label by hand
+     - E.g., crowd sourcing like Amazon Mechanical Turk
+
+* OCR Pipeline: Example of Artificial Data Synthesis
+- How can we increase data set size?
+  1. Synthesize data set
+     - Use font libraries to generate large training sets
+     - Paste characters against random backgrounds
+     - Apply scaling, distortion, adding noise, etc
+  2. Amplify a data set
+     - Start from a training set and add examples by warping/distorting existing
+       examples
+
+- Transformations and noise should be specific to the application domain
+  - E.g., Gaussian noise is not always appropriate
+
+* Ceiling Analysis for ML Pipeline
+- The most valuable resource is time
+  - Sometimes one works on an optimization for months
+  - The optimization doesn't make much difference
+
+- **Problem**: On which part of the pipeline should time/resource be spent?
+
+- **Solution**: Ceiling analysis
+  - Technique to analyze performance of pipelines
+  - Have a single number representing the performance of the entire system
+    - E.g., accuracy for an OCR system
+  - For each component:
+    - Mock the component with a box that always gives the correct output
+      (=oracle)
+    - Leave the remaining components untouched
+    - Compute performance of the entire pipeline
+  - Understand which component is critical to performance by estimating an upper
+    bound for overall performance when that component improves 10\%
+  - Don't trust your gut feeling but measure!
+
+## #############################################################################
+## Input Processing
+## #############################################################################
+
+// TODO(gp): Add something
+
+- Data cleaning
+- Dimensionality reduction
+- Feature engineering
+
+## #############################################################################
+## Learning Algorithms
+## #############################################################################
+
+### ############################################################################
+### Gradient Descent
+### ############################################################################
+
+* The Problem of Minimizing a Function
+
+::: columns
+:::: {.column width=80%}
+- **Goal**: minimize a function $J(\vw)$
+  - E.g., in-sample error $E_{in}(\vw)$
+
+- **Solutions**:
+  1. Analytical solution
+     - Impose the gradient of $J(\vw)$ to equal 0
+     - Find a closed-form solution for $\vw^*$
+  2. Numerical solution:
+     - Use an iterative method to update $\vw$ to reach the minimum value of
+       $J(\vw)$
+     - E.g., gradient descent
+     - It works even if there is an analytical solution
+::::
+:::: {.column width=20%}
+ ![](lectures_source/figures/Lesson6_Gradient_descent_2.png)
+// TODO: Convert in Tikz or improve
+::::
+:::
+
+* Gradient Descent: Intuition
+
+::: columns
+:::: {.column width=80%}
+- **Problem**:
+  - We are on a hilly surface and we want to walk down to the bottom of the hill
+
+- **Solution**:
+  - At each point:
+    - We look around
+    - We move a step in the direction where the surface is steepest
+  - We keep doing until we reach the bottom
+
+- Gradient descent
+  - Is a general technique for minimizing a twice-differentiable function
+  - Converges to
+    - A local minimum in general
+    - The global minimum if $J(\vw)$ is convex (e.g., logistic regression and
+      linear models)
+::::
+:::: {.column width=20%}
+![](lectures_source/figures/Lesson6_Gradient_descent_1.png)
+// TODO: Convert in Tikz or improve
+::::
+:::
+
+* Gradient descent with fixed learning rate (1/3)
+// TODO: Add images from tutorial
+- Consider the contour plot of a function
+
+- Start from a point $\vw(0)$ (random, the origin, ...)
+
+- At each step, move a fixed amount $\eta$ in the weight space (fixed learning
+  rate):
+
+  $$
+  \vw(t + 1) = \vw(t) + \eta \hat{\vvv}
+  $$
+
+  where $\hat{\vvv}$ is a unit vector
+
+- Pick $\hat{\vvv}$ to move to a value of $E_{in}(\vw)$ as negative as possible
+
+  - The change for $E_{in}$ is:
+    \begingroup \footnotesize
+    \begin{align*}
+    \Delta E_{in}
+    & = E_{in}(\vw(t + 1)) - E_{in}(\vw(t))
+    & \\
+    & = E_{in}(\vw(t) + \eta \hat{\vvv}) - E_{in}(\vw(t))
+    & \text{ (replacing the expression of $\vw(t + 1)$)} \\
+    & = \eta \nabla E_{in}(\vw(t))^T \hat{\vvv} + O(\eta ^ 2)
+    & \text{ (using Taylor expansion)} \\
+    \end{align*}
+    \endgroup
+  - Gradient descent keeps only $O(\eta)$ the term and ignores the rest
+  - Conjugate gradient considers up to $O(\eta^2)$ and ignores higher
+    infinitesimals
+
+* Gradient Descent with Fixed Learning Rate (2/3)
+
+- The minimal value of the scalar product
+  - Is $- \eta \|\nabla E_{in}(\vw(t))\|$,
+  - Happens when
+    $\hat{\vvv} = - \frac{\nabla E_{in}(\vw(t))}{\|\nabla E_{in}(\vw(t))\|}$
+- The change in weights is:
+  $$
+  \Delta \vw = - \eta \frac{\nabla}{\|\nabla\|}
+  $$
+- It is called "gradient descent" since we descend along the gradient of the
+  function to optimize
+
+* Gradient Descent with Fixed Learning Rate (3/3)
+- Each component of the weight $\vw$ is updated with the partial derivative with
+  respect to that coordinate:
+  \begin{align*}
+  \vw(t + 1)
+  &= \vw(t) - \eta \hat{v} \\
+  \vw(t + 1)
+  &= \vw(t) - \eta \frac{\nabla E_{in}(\vw(t))}{\|\nabla E_{in}(\vw(t))\|} \\
+  w_j(t + 1)
+  &= w_j(t) - \eta
+  \frac{1}{\|\nabla E_{in}(\vw(t))\|}
+  \frac{\partial E_{in}(\vw)}{\partial w_j}
+  \end{align*}
+
+- The update of all components should be simultaneous, i.e., computed at once
+
+- A step of the optimization when we update the solution (weights) is called
+  epoch
+
+* Gradient Descent: Stopping Criteria
+- In theory, stop when $\Delta E_{in} = \vv{0}$
+  - Numerically this might not occur
+- In practice, stop when
+  - The variation of $E_{in}$ is smaller than a given threshold
+    $\Delta E_{in} < \theta$
+  - We have reached a certain number of iterations
+
+- Monitoring gradient descent
+  - In theory, only need to compute the derivatives of the function $J(\vw)$ to
+    optimize
+  - In practice, need to monitor the algorithm progress by recomputing the cost
+    function $J(\vw)$ periodically to make sure it is decreasing
+
+* Setting $\eta$ in Gradient Descent with Fixed Learning Rate
+- Consider a 1D convex function
+  - If $\eta$ is small:
+    - The linear approximation of $E_{in}$ is effective
+    - Many steps are needed to converge to the minimum
+  - If $\eta$ is large:
+    - The linear approximation fails (higher terms affect values)
+    - It "bounces around"
+
+![](lectures_source/figures/Lesson6_Gradient_descent_3.png)
+
+- Idea: vary learning rate $\eta$ during gradient descent
+  - Smaller learning rates may find a better minimum
+  - Reduce $\eta$ as a function of iterations
+  - Cons: this introduces an additional parameter to tune
+
+* Gradient Descent with Variable Learning Rate
+- In gradient descent with fixed learning rate (i.e., constant change in weight
+  space), we use:
+
+  $$
+  \Delta \vw = - \eta \frac{\nabla J}{\|\nabla J\|}
+  $$
+
+- To converge quickly, we want to:
+  - Move fast in weight space (large $\eta$) when the surface is steep (large
+    gradient)
+  - Move slow in weight space (small $\eta$) near the minimum to avoid bouncing
+    around (small gradient)
+
+- Ideally, $\eta$ should increase with the slope: $\eta \propto \|\nabla J\|$
+
+- This is called gradient descent with variable learning rate:
+
+  $$
+  \Delta \vw = - \eta \nabla J
+  $$
+
+* Feature Scaling in Gradient Descent
+- Gradient descent converges faster if features are scaled to the same range
+  - Feature scaling techniques include min-max scaling and standardization
+  - E.g., applying standardization to a dataset can transform feature values to
+    have a mean of 0 and a standard deviation of 1
+
+- Otherwise, different gradient components have different errors due to
+  numerical approximation, causing the gradient to bounce around
+  - Unscaled features can lead to slow and unstable convergence due to varying
+    magnitudes
+  - E.g., if one feature ranges from 1 to 1000 and another ranges from 0.01 to
+    1, the large disparity can cause inefficient updates
+
+* Issues with Batch Gradient Descent
+- Consider the case of squared error with $n$ samples
+  $$
+  E_{in}(\vw)
+  = \frac{1}{n} \sum_i e(h_{\vw}(\vx_i) - y_i)
+  = \frac{1}{n} \sum_i (h_{\vw}(\vx_i) - y_i) ^ 2
+  $$
+- The Batch Gradient Descent (BSD) requires to update each component of the
+  weight vector with an expression like:
+  $$
+  \vw(t + 1) = \vw(t) - \eta \frac{\nabla E_{in}}{\|\nabla E_{in}\|}
+  $$
+- In terms of coordinates for squared error:
+  $$
+  w_j(t + 1) = w_j(t) -
+  \eta \frac{2}{n}
+  \sum_{i=0}^n (h_{\vw}(\vx_i) - y_i)
+  \frac{\partial h_{\vw}(\vx_i)}{\partial w_j}
+  $$
+- With a large number of training examples (e.g., $N = 10^6$), gradient descent:
+  - Is computationally expensive as it requires evaluating the gradient from all
+    examples for a single update
+  - Requires storing all the data in memory
+
+### ############################################################################
+### Stochastic Gradient Descent
+### ############################################################################
+
+* Stochastic Gradient Descent
+- **Idea** of Stochastic Gradient Descent (SGD)
+  - Update the weights only for one training example picked at random
+
+- **Algorithm**
+  - Pick one $(\vx_n, y_n)$ at a time from the available examples
+  - Compute $\nabla e(h(\vx_n), y_n)$ to update the weights:
+    $$
+    \Delta \vw = -\eta \nabla e
+    $$
+  - Update the weight considering only one random example:
+    $$
+    w_j(t + 1) = w_j(t) -
+    \eta \frac{2}{n}
+    (h_{\vw}(\vx_t) - y_t)
+    \frac{\partial h_{\vw}(\vx_t)}{\partial w_j}
+    $$
+
+- $\nabla e$ is a function of a random var $\vx_n$
+  - The average direction of SGD is the same direction as batch version
+    $$
+    \EE[\nabla e]
+    = \frac{1}{N} \sum \nabla e(h(\vx_n), y_n)
+    = \nabla \frac{1}{N} \sum e(h(\vx_n), y_n)
+    = \nabla E_{in}
+    $$
+
+- In Stochastic Gradient Descent (SGD):
+  - The path in the weight space is more random
+  - It does not even converge but rather oscillates around the local minimum
+
+* Mini-Batch Gradient Descent
+- Bring together characteristics of both Batch and Stochastic Gradient Descent
+
+- Use $b$ examples to make an update to the current weight
+  - $b$ represents the batch size, which is a hyperparameter you can choose
+  - A common choice for $b$ might be $b = 32$ or $b = 64$
+
+- Mini-batch GD offers a balance between SGD noisiness and full-batch
+  approaches, using small, random data samples for updates
+
+* On-Line Learning and Gradient Descent
+- Continuous stream of training examples requires updating the model
+  - In real-time systems, new data points arrive and the model adapts without
+    fully retraining
+  - E.g., in stock market prediction models, each transaction can dynamically
+    adjust model weights
+  - Handle variation in the dynamics of the underlying process
+
+- Stochastic gradient (SGD) and mini-batch descent are suitable for online
+  learning, updating the model one example at a time
+
+- Discard examples for a "compressed" model representation
+  - Useful for large data streams where storing every data point is impractical
+  - E.g., in training a language model on live chat data, older conversations
+    might be discarded after updates to maintain relevant patterns in the model
+
+* SGD vs BGD vs Mini-Batch
+- To update the weights:
+  - BGD (batch gradient descent) uses all the training examples
+  - SGD (stochastic gradient descent) uses a single (random) training example
+  - Mini-batch GD uses only a subset of training examples
+
+\begingroup \scriptsize
+
+| **Aspect**      | **Batch Gradient Descent**               | **Stochastic Gradient Descent**      |
+| --------------- | ---------------------------------------- | ------------------------------------ |
+| Computation     | Uses all examples                        | One example at a time                |
+| Memory          | Requires all examples in memory          | Require less memory                  |
+| Randomization   | More likely to terminate in flat regions | Avoid local minima due to randomness |
+| Regularization  | No implicit regularization               | Oscillations act as regularization   |
+| Parallelization | Can be parallelized                      | Less parallel-friendly               |
+| Online Learning | Not suitable                             | Suitable for online learning         |
+
+\endgroup
+
+* Map-Reduce for Batch Gradient Descent
+- In map-reduce we use $k$ machines to parallelize the summation (map step) and
+  then we send the $k$ partial sums to a single node to accumulate the result
+  (reduce step)
+- Batch GD (and many learning algorithms) can be expressed in this map-reduce
+  form
+
+// TODO: Add a tikz picture
+
+* Coordinate Descend
+- Minimize $J(x_0, ..., x_n)$ by optimizing along one direction $x_i$ at a time
+  - Instead of computing all derivatives
+
+- **Algorithm**
+  - Pick a random starting point $\vw(0)$
+  - Pick a random order for the coordinates $\{ x_i \}$
+  - Find the minimum along the current coordinate (1D optimization problem)
+  - Move to the next coordinate $x_{i+1}$
+  - The sequence of $\vw(t)$ is decreasing
+  - A minimum is found if there is no improvement after one cycle of scanning
+    all coordinates
+  - The minimum is local
+
+* Gradient Descent vs Pseudo-Inverse for Linear Models
+- For linear models we can use either pseudo-inverse or gradient descent to find
+  optimal $\vw^*$
+
+- **Gradient descent**
+  - Choose learning rate $\eta$
+  - Requires many iterations to converge
+  - Monitor stopping criteria, oscillations, etc
+  - Effective for many features $P$
+
+- **Pseudo-inverse**
+  - No parameter selection needed
+  - Converges in one iteration (with nested loops)
+  - Computes $(\mX^T \mX)^{-1}$, a $P \times P$ matrix
+    - Inverse complexity $O(P^3)$
+    - E.g., for $P \approx 10,000$, gradient descent is preferable
+
+## #############################################################################
+## Performance Metrics
+## #############################################################################
+
+* How to Make Progress in ML Research
+- There are many possible directions for research
+  - Different features
+  - Different data preprocessing methods
+  - Different models
+  - Different training algorithms
+  - Different evaluation techniques
+  - Explore optimization strategies
+
+- What to do?
+
+- Approach
+  - Evaluate models systematically using a single number
+    - Implement metrics (E.g., accuracy, F1 score) for insight
+    - Use cross-validation for model validation
+  - Statistical tests to ensure differences are not random
+    - Utilize hypothesis testing for genuine improvements
+    - Conduct A/B testing for real-world validation
+
+* How to Measure Classifier'S Performance?
+- Success / hit / win rate (or error / miss rate)
+  - Measures the proportion of correct predictions by the model
+  - Important for understanding overall accuracy
+  - E.g., in binary classification, 80 correct predictions out of 100 result in
+    an 80% success rate
+
+- Log probability / cross-entropy error
+  - Evaluates classification model with probabilities between 0 and 1
+  - E.g., lower cross-entropy loss indicates better performance
+
+- **Precision / recall / F-score**
+  - Useful for evaluating models in imbalanced data scenarios
+  - Precision: ratio of correctly predicted positive observations to total
+    predicted positives
+    - E.g., a precision of 0.75 means 75% of identified positives are true
+      positives
+  - Recall: ratio of correctly predicted positive observations to actual
+    positives
+    - E.g., a recall of 0.60 means 60% of actual positives were correctly
+      identified
+  - F-score: weighted harmonic mean of precision and recall
+
+- **Utility function**
+  - Customizes the evaluation metric to prioritize types of errors and success
+    - E.g., true / false positives / negatives
+  - E.g., in medical diagnosis, a utility function might give higher weight to
+    minimizing false negatives to prevent missed diagnoses
+
+* Training vs Test Set
+- Performance on train set $E_{in}$ is an optimistic estimate of $E_{out}$
+  - One can have:
+    - 0\% error rate on training data (e.g., memorizing responses for training
+      set)
+    - 50\% error rate on test set (e.g., by answering randomly)
+
+- To evaluate model performance, use a test set that played no role in training
+
+- Training and test sets should be representative samples of the problem
+  - E.g., credit risk problem
+    - One cannot use data from a bank branch in Florida to assess a model built
+      with data from a bank branch in New York
+    - Characteristics of the populations are very different
+
+* Lots of Data Scenario vs Scarce Data Scenario
+- **Lots of data scenario**
+  - Ideal to have lots of data (ideally infinite)
+  - Learn on lots of data
+    - Fit all degrees of freedom of a complex model
+  - Predict on lots of data
+    - Assess precise out-of-sample performance
+
+- **Scarce data scenario**
+  - Often data (especially data of high quality) is scarce
+    - E.g., facial recognition datasets with limited annotated data needing
+      careful management
+  - Cannot use all data as a training set
+  - Need to hold out data to estimate performance metrics and bounds
+    - Split the data 70-30 or 80-20 in train and test sets
+    - Consider cross-validation techniques to maximize data usage
+  - Other approaches:
+    - Augment data artificially, like data augmentation in image processing
+    - Utilize transfer learning with pre-trained models on related tasks
+
+// TODO: Add plot for the splitting of data
+
+* Splitting Data Into Training, Validation, Test Sets
+- Training, validation, and test sets must be:
+  - Distinct
+  - Representative of the problem
+    - E.g., each class in all sets must be represented according to the original
+      data
+  - Sized based on available data and problem needs
+
+- To ensure sets have the same distribution:
+  - Stratified sampling
+    - E.g., each class label is proportionally represented in each set
+  - Shuffle and then sample
+    - Achieves randomization, maintaining distribution
+  - Sample and check statistics of variables (e.g., mean, std dev, PDF)
+    - Compare these statistics to ensure each set mirrors the broader dataset
+
+* Rule of Thumbs for Data Set Splits
+- If $n$ is **large** $\to$ use a 60-20-20 split
+  - Training: 60%
+  - Validation: 20%
+  - Test: 20%
+
+- If $n$ is **medium** $\to$ use a 60-40 split
+  - Training: 60%
+  - Test: 40%
+  - Not possible to learn hyperparameters, so no validation set
+
+- If $n$ is **small** $\to$ use cross-validation and report "small data size"
+  - Use K-fold cross-validation
+  - Be cautious of the increased chance of high accuracy by chance
+  - Is machine learning for the given sample size even suitable?
+
+* Can We Ever Use Test Set as Training Set?
+- Once the model is selected and validated, reuse all available data (including
+  the test set) to generate the model for deployment
+  - This ensures the model benefits from all available information
+
+- Generally, more data is better, though returns diminish after exceeding a
+  certain volume
+  - Initially, increasing data size can significantly improve model performance
+  - Eventually, adding more data results in smaller accuracy gains and may not
+    justify the increased computational cost
+
+* In-Sample vs Out-Of-Sample Error Expressions
+- We want to find a function $h$ that approximates the unknown function $f$,
+  $h \approx f$ over the space of inputs $\vx \in \calX$ ("script X")
+
+- The error is usually defined point-wise:
+
+  $$
+  e(h(\vx_i), f(\vx_i))
+  $$
+  - E.g.,
+    - Squared error: $e(\vx) = (h(\vx) - f(\vx))^2$
+    - 0-1 binary error: $e(\vx) = I[h(\vx) == f(\vx)]$
+    - Log probability: $e(\vx) = - \log( \Pr(h(\vx) == f(\vx)) )$
+
+- In-sample error is computed using all points in the training set:
+
+  $$
+  E_{in}{(h)} = \frac{1}{N} \sum_{i=1}^N e(h(\vx_i), f(\vx_i))
+  $$
+
+- Out-of-sample error is computed on the entire space of inputs $\calX$
+
+  $$
+  E_{out}(h) = \EE_{\vx \in \calX}[e(h(\vx), f(\vx))]
+  $$
+
+* Mean Squared Error (MSE)
+- MSE is the average difference of squared error:
+
+  $$
+  \text{MSE}
+  \defeq \frac{1}{N} \sum_{i=1}^N (h(\vx_i) - f(\vx_i))^2
+  $$
+  - MSE measures the estimator quality, quantifying the difference between
+    estimated and actual values
+  - E.g., in a house price prediction model, MSE determines how close predicted
+    prices are to actual prices
+
+- **Cons:**
+  - It doesn't share the unit of measure with the output
+    - Distorts error interpretation; predicted and actual values are usually in
+      different units
+  - Sensitive to outliers
+    - A single large error can disproportionately affect the MSE
+    - Use median absolute deviation (MAD), median of squared error for
+      robustness against outliers
+
+* Root Mean Squared Error (RMSE)
+- RMSE is the standard deviation of the Mean Squared Error (MSE):
+
+  $$
+  \text{RMSE} \defeq \sqrt{\text{MSE}}
+  = \sqrt{\frac{1}{N} \sum_{i=1}^N (h(\vx_i) - f(\vx_i))^2}
+  $$
+
+- **Pros:**
+  - Same units as the output, allowing intuition of its magnitude compared to
+    the mean
+  - Facilitates comparison between different data sets or models since the
+    metric is normalized to the output's scale
+
+- **Cons:**
+  - Sensitive to outliers (like MSE) which can excessively affect the metric
+  - May not be suitable for ranking models when outliers or skewed distributions
+    are present
+
+* Median-Based Metrics
+- We can use metric based on median (i.e., the 0.5 quantile of absolute error):
+
+- Median absolute deviation:
+
+  $$
+  \text{MAD} \defeq \text{median}_i(|h(\vx_i) - f(\vx_i)|)
+  $$
+
+- Median squared error:
+
+  $$
+  \defeq \text{median}_i(|h(\vx_i) - f(\vx_i)|^2)
+  $$
+
+* How to Choose an Error Measure?
+
+- Error measure depends on the **application** and should be **specified by the
+  "customer"**:
+  - The customer needs to define what constitutes an acceptable level of error
+    for their specific use case
+  - E.g., medical applications might have a low tolerance for errors, while a
+    recommendation system might have a higher tolerance
+
+- Otherwise, we can pick:
+  - A **plausible error measure**:
+    - E.g., squared error is commonly used when assuming Gaussian noise in the
+      data
+  - A **"friendly error" measure**:
+    - E.g., measures that allow for closed-form solutions simplify calculations
+      significantly
+    - Convex optimization-friendly measures ensure optimization algorithms find
+      the global minimum easily
+
+* Error Measures: Fingerprint Verification Example
+- In fingerprint verification:
+  - Recognizing a valid fingerprint has no error
+  - Otherwise, it is a false positive or a false negative
+
+- Error weight depends on the application
+  - For the same problem in two set-ups, the error measure is the opposite
+  - For supermarket applications:
+    - False positives are minor (e.g., one more discount)
+    - False negatives are costly (e.g., annoyed customer, slow line)
+  - For CIA building access:
+    - False negatives are acceptable (triggers further security)
+    - False positives are disastrous
+
+### ############################################################################
+### Precision and Recall
+### ############################################################################
+
+* Error Metrics for Skewed Classes
+- When classes are skewed (i.e., one class is very rare), accuracy can be
+  misleading
+  - Use metrics like confusion matrix, precision, and recall
+
+- Example:
+  - Train a classifier to distinguish tumors as:
+    - $y = 1$: malignant
+    - $y = 0$: benign
+  - Classifier's error rate is 1% (i.e., guess correctly 99% of the time) seems
+    good
+  - But only 0.5% of patients have cancer
+    - A trivial classifier that always outputs $y = 0$ has a 0.5% error rate!
+    - Now a 1% error rate does not look good anymore
+
+* Decision Matrix ::: Columns :::: {.Column Width=60%}
+- Aka confusion matrix
+
+- Typically $y = 1$ encodes the rare class to predict
+
+- Assuming actual and predicted class $\in \{0, 1\}$, we have 4 possible cases:
+  - $act = 1$, $pred = 1$: true positive (TP)
+  - $act = 0$, $pred = 0$: true negative (TN)
+  - $act = 1$, $pred = 0$: false negative (FN) (output $pred = 0$, but it is
+    wrong)
+  - $act = 0$, $pred = 1$: false positive (FP) (output $pred = 1$, but it is
+    wrong)
+
+- Aggregate decision matrix in precision and recall
+::::
+:::: {.column width=35%}
+
+```tikz
+% Draw matrix
+\draw[thick] (0,0) rectangle (4,4);
+\draw[thick] (0,2) -- (4,2); % horizontal middle
+\draw[thick] (2,0) -- (2,4); % vertical middle
+
+% Labels for actual class
+\node[rotate=90] at (-0.8,3) {act = 1};
+\node[rotate=90] at (-0.8,1) {act = 0};
+
+% Labels for predicted class
+\node at (1,4.3) {pred = 1};
+\node at (3,4.3) {pred = 0};
+
+% Cell labels
+\node at (1,3) {\textbf{TP}};
+\node at (3,3) {\textbf{FN}};
+\node at (1,1) {\textbf{FP}};
+\node at (3,1) {\textbf{TN}};
+```
+::::
+:::
+
+* Precision vs recall
+- Assume that $y = 1$ encodes the rare event we want to detect
+
+- **Precision** measures how often there is a true positive _given that pred =
+  1_
+
+  \begingroup \small
+
+  $$
+  \text{precision}
+  \defeq \Pr(\text{TP} | \text{pred == 1})
+  = \frac{|\text{pred == 1} \land \text{act == 1}|}{|\text{pred == 1}|}
+  = \frac{\text{TP}}{\text{TP} + \text{FP}}
+  $$
+
+  \endgroup
+
+- **Recall** measures how often there is a true positive _given that act = 1_
+  \begingroup \small
+
+  $$
+  \text{recall}
+  \defeq \Pr(\text{TP} | \text{act == 1})
+  = \frac{\text{TP}}{|\text{act == 1}|}
+  = \frac{\text{TP}}{\text{TP} + \text{FN}}
+  $$
+
+  \endgroup
+
+- Both are conditional probability measuring the fraction of TP under different
+  circumstances:
+  - (Pre)cision: pred = 1
+  - Rec(a)ll: act = 1
+
+- Precision/recall are widely used in information retrieval
+  - E.g., a search engine:
+    - Returns 30 pages; only 20 are relevant $\implies$ precision = 20 / 30 = 2
+      / 3
+    - Fails to return another 40 relevant pages $\implies$ recall = 20 /
+      (40 + 20) = 20 / 60 = 1 / 3
+
+* Precision / Recall in Terms of Quality / Quantity
+- **Precision**
+  - Increasing precision means when we predict 1, we are more likely to be right
+    - E.g., in a spam email detection system, "precision is 90%" means 90% of
+      the emails marked as spam are actually spam
+  - A higher precision indicates fewer false positives
+  - Measures "quality" of prediction
+
+- **Recall**
+  - Increasing recall means we predict more instances when the outcome is 1
+    - E.g., in a spam email detection system, "recall is 80%" indicates 80% of
+      all actual spam emails were correctly identified as spam
+  - A higher recall means fewer false negatives
+  - Measures "quantity" of prediction (coverage)
+
+* Precision / recall for trivial classifiers
+- A classifier that outputs always the most common class 0 has:
+  \begin{alignat*}{3}
+  \text{precision}
+  & = 0
+  & \text{(since TP = 0)}
+  \\
+  \text{recall}
+  & = 0
+  & \text{(since TP = 0)}
+  \\
+  \end{alignat*}
+- A classifier that outputs always the rare class 1 has:
+  \begin{alignat*}{3}
+  \text{recall}
+  & = 1
+  & \text{(since FN = 0)}
+  \\
+  \text{precision}
+  & \defeq \Pr(\text{TP} | \text{pred == 1})
+  & \text{(by definition)}
+  \\
+  &= \frac{\text{TP}}{\text{TP + FP}} 
+  & \text{($TP + FP = n$ because}
+  \\
+  &= \frac{\#(y = 1)}{n}
+  & \text{classifier always emits 1)}
+  \\
+  &= \Pr(\text{pos}) \approx 0
+  & \text{(the positive class is very rare)}
+  \\
+  \end{alignat*}
+- A trivial classifier has precision or recall close to 0
+
+* Trading Off Precision and Recall
+- In theory, we want to increase both precision and recall
+
+- In practice, modify the threshold of a probabilistic classifier to trade off
+  precision and recall in practice
+
+- E.g., use logistic regression to predict cancer:
+  - With a threshold = 0.5, the classifier has:
+    - Precision = $\frac{\text{TP}}{|\text{pred == 1}|}$
+    - Recall = $\frac{\text{TP}}{|\text{act == 1}|}$
+  - Increase the threshold $\implies$ output 1 only if more confident, i.e.,
+    increase precision
+  - Decrease the threshold $\implies$ output 1 more often, decreasing the
+    chances of missing a possible case of cancer, i.e., increase recall
+
+// TODO: Pic from notebook
+
+* Precision-Recall: Pros / Cons
+- Pros:
+  - Give insight on the behavior of a classifier (e.g., confusion matrix)
+  - Avoid mistaking a trivial classifier for a good classifier
+
+- Cons:
+  - We have two different numbers, thus it is difficult to compare classifiers
+    to each other
+  - Solutions: F-score, AUC
+
+* Precision-Recall Curves
+- **Aka ROC curves**
+
+- Plot the curve on a precision-recall plane: ($y =$ precision, $1 - x =$
+  recall) to show the precision vs recall trade-off for a classifier
+  - E.g., changing the threshold of logistic regression
+
+- A curve higher than another means a better classifier, since for the same
+  recall we can get a higher precision
+  - The best classifier (precision = recall = 1) is in the top-right corner
+
+- The precision-recall plot can have different shapes, e.g.,
+  - Diagonal (pure luck)
+  - Convex up (better than luck)
+  - Convex down (worse than luck)
+
+// TODO: Pic from notebook
+
+* Area Under the Curve
+- **AUC** is the area under the precision-recall curve
+  - Provides a robust metric by integrating over all thresholds
+  - Higher AUC indicates better performance in differentiating between classes
+  - AUC = 0.5 suggests no discriminative power, similar to random guessing,
+  - AUC closer to 1.0 indicates high performance
+
+- **Pros**:
+  - Single number summarizing classifier behavior, useful for comparing
+    different models
+  - Does not require selecting a threshold for performance calculation
+  - Can handle imbalanced datasets effectively
+
+- E.g., consider a classifier for medical diagnosis
+  - The AUC helps understand how well the model distinguishes between patients
+    with and without a disease across all thresholds
+
+* F-Score
+- The F-score is the harmonic mean of precision and recall:
+
+  $$
+  \text{F-score}
+  \defeq \frac{2}{\frac{1}{P} + \frac{1}{R}}
+  = 2 \frac{P \cdot R}{P + R}
+  $$
+
+- **Interpretation:**
+  - Trivial classifiers: $P = 0$ or $R = 0$ => F-score = 0
+  - Perfect classifiers: $P = R = 1$ => F-score = 1
+  - For F-score to be large, both $P$ and $R$ must be high
+
+- Why not just averaging $P, R$?
+  - A classifier that always outputs 1 has $R = 1$ and $P = 0$
+  - $\frac{P + R}{2} = \frac{1}{2}$, while we prefer a low value (ideally 0)
+
+## #############################################################################
+## Model Selection
+## #############################################################################
+
+* Model Selection Problem
+- Model selection chooses the best model from a set of candidates based on
+  performance
+  - Needed when multiple hypotheses can explain the data
+
+- Certain parameters are fixed, while others need to be picked, e.g.,
+  - Set of features
+    - E.g., selecting a subset of features from a dataset with 100 variables
+  - Learning algorithms
+    - E.g., deciding how to train a neural network
+  - Model types
+    - E.g., linear regression model vs. Support Vector Machine (SVM)
+  - Model complexity
+    - E.g., models with polynomials of degree $d < 10$
+  - Values of the regularization parameter
+    - E.g., trying different values like 0.01, 0.1, and 1.0
+
+- Evaluate model accuracy, precision, and recall
+- Perform cross-validation to assess model performance
+- Consider computational cost
+  - E.g., a simple logistic regression is faster than a complex neural network
+
+* Model Selection Process
+
+1. Split data into $D_{train}, D_{val}, D_{test}$
+   - Commonly: 60\% training, 20\% validation, 20\% test
+   - Like splitting 80\% training between two learning phases
+
+2. Given $N$ hypotheses, learn on $D_{train}$ to get $g_1, ..., g_N$
+
+3. Evaluate hypotheses on $D_{val}$ estimating errors
+   $E_{val}^{(1)}, ..., E_{val}^{(N)}$
+
+4. Pick model $g_m$ with minimum $E_{val}^{(m)}$
+
+5. Use test set $D_{test}$ to estimate fair performance of model $g_m$, i.e.,
+   $E_{val} \approx E_{out}$
+
+6. Retrain model with entire $D = D_{train} \cup D_{val} \cup D_{test}$ to get
+   final $g_m^{*}$
+
+// TODO: Add a tikz plot
+
+* Model Selection as Learning
+- "Picking the model with smallest $E_{val}$" is a form of learning:
+  - Hypothesis set: $\{g_1, ... , g_N\}$
+  - Training set: $D_{val}$
+  - Pick the best model $g_m$
+
+- After model selection
+  - Experimentally $E_{val}(g_m) < E_{out}(g_m)$, i.e., $E_{val}(g_m)$ is a
+    (optimistically) biased estimate of $E_{out}(g_m)$
+  - Theoretically:
+    - The penalty for model complexity with a finite set of hypotheses is
+      $$
+      E_{out}(g_m) \le E_{val}(g_m) + O(\sqrt{\log(N / K)})
+      $$
+    - Use VC dimension for an infinite number of hypotheses (e.g., choice of
+      $\lambda$ for regularization)
+
+// TODO: Add a tikz plot
+
+## #############################################################################
+## Aggregation
+## #############################################################################
+
+* Ensemble Learning: Intuition
+- Ensemble learning combines multiple models to improve prediction accuracy
+  - **Idea**: a group of weak learners can form a strong learner
+
+- Combine outputs of models $X_i$ to build a model $X^*$ better than any $X_i$,
+  with the wisdom of all
+  - Utilizes diversity in model predictions to improve accuracy
+  - Each model contributes its unique perspective, reducing overfitting
+  - E.g., like a panel of voting experts
+
+- Example: in computer vision detecting a face is difficult task (at least
+  circa 2010)
+  - Look for different features:
+    - Are there eyes?
+    - Is there a nose?
+    - Are eyes and nose in the correct position?
+    - ...
+  - Each feature is weak per-se, but together they become reliable
+
+* Ensemble Learning: Different Techniques
+- **Bagging** (bootstrap + aggregation)
+  - Reduces variance by averaging predictions from different models
+  - E.g., decision trees $\to$ bagging $\to$ random forest
+    - Bagging creates multiple versions of a decision tree (each trained on a
+      random sample of data)
+    - Average their predictions to improve accuracy
+
+- **Boosting**
+  - Reduces bias by focusing on errors made by previous models
+  - Sequentially adds models, each correcting its predecessor
+  - E.g., `adaBoost` increases weights of incorrectly classified data points to
+    learn the next model
+
+- **Stacking**
+  - Uses a meta-model to combine separate models using weights
+  - E.g., a stacking ensemble
+    - Uses a logistic regression as a meta-model
+    - Combines the predictions of other models (e.g., decision trees, support
+      vector machines, and neural networks)
+
+* Ensemble Learning: Relation with Statistics
+- **Bagging**
+  - Improves performance by adding randomized variants (mimicking multiple
+    training sets)
+  - Reduce variance without affecting bias
+
+- **Boosting**
+  - Use another model to learn residuals, i.e., difference between predicted and
+    true values
+  - Related to the statistical technique of "forward stagewise additive models"
+
+- **Stacking**
+  - If we have 3 independent classifiers, each with $\Pr(\text{correct}) = 0.7$
+    \begin{alignat*}{2}
+    \Pr(\text{majority correct})
+    &= \Pr(\text{at least 2 classifiers correct}) \\
+    &= {3 \choose 2} 0.7^2 0.3 + 0.7^3 \\
+    &= 3 \times 0.7^2 \times 0.3 + 0.7^3 \\
+    &\approx 0.78 > 0.7
+    \end{alignat*}
+
+* Ensemble learning: pros and cons
+- **Pros**
+  - Hypothesis set $\calH$ is increased by combining hypotheses from different
+    models
+
+- **Cons**
+  - More computationally intensive to train and evaluate
+  - Loss of interpretability
+  - Risk of overfitting (model complexity is increased)
+  - Ensemble learning contradicts Occam's razor, which advocates simplicity
+
+* When Ensemble Learning Works
+- Combining multiple models with ensemble learning works when models:
+  - Are very different from each other
+  - Treat a reasonable percentage of the data correctly
+    - E.g., one cannot do much if all classifiers have 50% accuracy
+  - Complement each other: they are specialists in a part of the domain where
+    the others don't perform well
+
+* How to Combine Outputs in Ensemble Learning
+- **Regression**
+  - Weighted average of prediction
+  - E.g., by accuracy of each model or by a prior
+
+- **Classification**
+  - Weighted vote of predicted classes
+  - It needs an odd number of models to break ties
+
+- **Probabilistic classification**
+  - Weighted average of class probabilities
+
+- We can also learn a meta-learner (stacking) to combine multiple models
+
+### ############################################################################
+### Bagging
+### ############################################################################
+
+* Bagging
+- Bagging stands for "Bootstrap AGGregation"
+
+- **Learning procedure**
+  - Several training datasets are extracted randomly by sampling with
+    replacement from the original dataset (i.e., bootstrap)
+  - Learn multiple models, one for each training set
+  - Combine outputs using various methods
+  - Result is a better model than a single model
+
+- **Why bagging works?**
+  - From the bias-variance decomposition view, combining multiple models:
+    - Reduces the variance component
+    - Without compromising the bias (bagged models are typically unbiased)
+  - Bagging mimics extracting more training sets (though not independent) from
+    the unknown distribution
+
+* Bagging and Instability in Learning Algorithms
+- Bagging works best with different models, especially non-linear models
+
+- Introduce randomization in the learning algorithm intentionally
+
+- **Decision Trees**
+  - Disable pruning
+  - Break ties randomly when selecting the best attribute to split
+  - E.g., bagging trees results in random forests
+
+- **Multilayer Perceptrons**
+  - Use different initial weights in backpropagation to reach different local
+    minima
+
+- **Nearest Neighbor Classifier**
+  - Use a random subset of features
+  - Resampling the training set has limited impact, as it is equivalent to
+    changing example weights
+
+### ############################################################################
+### Boosting
+### ############################################################################
+
+* Boosting
+- Boosting builds models that complement each other
+  - Typically use homogeneous models, i.e., parametrized models from $\calH$
+
+- Strong classifiers can be built from weak classifiers
+  - E.g., decision stumps = decision trees with one level
+
+- Statistical meaning of boosting:
+  - Boosting implements forward stagewise additive modeling
+  - Use another model to learn residuals (difference between predicted and true
+    values)
+
+- Boosting does not work for linear regression:
+  - Combination of linear models is still a linear model
+  - OLS finds optimal weights in one step
+  - Combining linear regressions from different attributes is equivalent to a
+    single multiple linear regression
+
+* Adaboost.M1
+- Widely used for classification
+- Assume examples can be weighted in the cost function used to learn
+  - Otherwise use resampling
+
+- **Learning procedure**
+  - Start with equal weights for all examples
+  - Iterate:
+    - Learn a classifier based on current weights for examples
+    - Weight the answer of each model by overall score (e.g., accuracy) or
+      probability
+    - Evaluate the ensemble
+    - Adjust weights for examples classified correctly/incorrectly
+
+### ############################################################################
+### Stacking
+### ############################################################################
+
+* Stacking
+- Stacking learns how to combine models (not necessarily of the same type)
+
+- The problem is that with voting / averaging we don't know which model to trust
+- Instead of voting or weighting we can use a meta-learner (level 1) to learn
+  how to pick / mix models (level 0)
+
+- **Learning procedure**
+  - Learn "level 0" models
+  - Learn "level 1" model using hold-out data from learning of level 0 models
+    (like in model selection)
+    - Build training data with predicted values from level 0 models
+    - Then learn level 1
+    - Use a simple model for level 1 (e.g., linear models or trees) to avoid
+      overfitting
+    - Use probabilities from level 0, so level 1 can assess the confidence of
+      each model
+
+* Boosting vs Bagging vs Stacking
+
+\begingroup \scriptsize
+
+| **Aspect**               | **Bagging**                         | **Boosting**                       | **Stacking**                             |
+| ------------------------ | ----------------------------------- | ---------------------------------- | ---------------------------------------- |
+| **Combines**             | Models of the same type             | Models of the same type            | Models of different types                |
+|                          |                                     |                                    |
+| **Learning**             | Models trained independently        | Iterative training                 | Models trained independently             |
+| **Predicting**           | Uses uniform or data-driven weights | Uses learned weights from training | Uses learned weights or confidence       |
+| **Main Objective**       | Reduce variance                     | Reduce bias                        | Improve generalization through diversity |
+| **Base Learners**        | Often strong learners               | Often weak learners                | Any model type (heterogeneous ensemble)  |
+| **Sensitivity to Noise** | Low                                 | High                               | Medium                                   |
+| **Parallelizable**       | Yes                                 | No (sequential dependency)         | Partially (base models parallelized)     |
+| **Meta-model**           | Not used                            | Not used                           | Required                                 |
+|                          |                                     |                                    |
+| **Examples**             | Random Forest                       | AdaBoost, Gradient Boosting        | Stacked Generalization, Blending         |
+
+\endgroup
diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/output/test.txt
new file mode 100644
index 000000000..c9faea410
--- /dev/null
+++ b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_executable1.test3/output/test.txt
@@ -0,0 +1,1775 @@
+---
+fontsize: 10pt
+---
+\let\emph\textit
+\let\uline\underline
+\let\ul\underline
+::: columns
+:::: {.column width=15%}
+![](lectures_source/UMD_Logo.png)
+::::
+:::: {.column width=75%}
+
+\vspace{0.4cm}
+\begingroup \large
+MSML610: Advanced Machine Learning
+\endgroup
+::::
+:::
+
+\vspace{1cm}
+
+\begingroup \Large
+**$$\text{\textcolor{blue}{\text{Machine Learning Techniques}}}$$**
+\endgroup
+\vspace{1cm}
+
+**References**:
+
+- AIMA: ?
+
+- Hastie: ?
+
+
+# Paradigms
+
+- **Machine Learning Paradigms with Examples (1/3)**
+
+- **Supervised Learning**
+  - Learn from labeled data to predict labels for new inputs
+  - E.g., image classification using ResNet on ImageNet
+
+- **Unsupervised Learning**
+  - Discover hidden patterns or structure in unlabeled data
+  - E.g., K-means clustering for customer segmentation
+
+- **Reinforcement Learning**
+  - Learn through interaction with an environment, receiving rewards/punishments
+  - E.g., deep Q-Learning for playing Atari games
+
+- **Self-Supervised Learning**
+  - Generate pseudo-labels from unlabeled data to pre-train models
+  - E.g., BERT (Masked Language Modeling)
+
+- **Semi-Supervised Learning**
+  - Combine small labeled data with large unlabeled data to improve performance
+  - E.g., named entity recognition (NER) using annotated sentences with entity
+    tags combined with many raw text documents
+
+- **Machine Learning Paradigms with Examples (2/3)**
+
+- **Online Learning**
+  - Learn incrementally from a stream of data in real time
+  - E.g., online logistic regression for click-through rate prediction
+
+- **Multi-Task Learning**
+  - Train simultaneously a model to perform multiple related tasks
+  - E.g., learn sentiment analysis and question answering
+
+- **Meta-Learning**
+  - "Learning to learn": adapt quickly to new tasks using prior experience
+  - E.g., a model can be fine-tuned quickly on a new task using just a few
+    gradient steps
+
+- **Zero-Shot / Few-Shot Learning**
+  - Generalize to new tasks with no or few labeled examples
+  - E.g., GPT-4 solving tasks with zero-shot prompting
+
+- **Active Learning**
+  - The model selects the most informative samples to be labeled by an oracle
+    (e.g., a human)
+  - E.g., pick samples where the model is least confident to get more examples
+
+- **Machine Learning Paradigms with Examples (3/3)**
+
+- **Federated Learning**
+  - Train models across decentralized devices without sharing raw data
+  - E.g., fraud detection or credit scoring across banks
+
+- **Evolutionary Learning**
+  - Optimize model structures or parameters using evolutionary algorithms
+    inspired by natural selection and genetics
+  - Gradient free, global search, discrete structures, variable length inputs
+  - E.g., genetic algorithms
+
+- **Curriculum Learning**
+  - Train models on easier tasks first, gradually increasing difficulty
+  - E.g., curriculum-based training in robotic control simulations
+
+- **Multi-Agent Learning**
+  - Multiple agents learn and interact in shared environments, often in
+    game-theoretic settings (e.g., competition, collaboration)
+  - E.g., AlphaStar to play StarCraft II
+
+- **Supervised Learning**
+- Learn a function $f: X \to Y$ that maps inputs to correct outputs using
+  training examples $(\vx, y)$ where inputs and correct output pairs are known
+  - Requires labeled data for training
+  - Measure performance with error on a separate test set
+
+- **Classification**: output is a discrete label, e.g.,
+  - `Spam` vs `Not Spam`
+  - Digit recognition `0`, `1`, ...
+  - Sentiment analysis `Pos`, `Neg`, `Neutral`
+
+- **Regression**: output is a continuous value, e.g.,
+  - House prices given features like size and location
+  - House demand
+  - Stock prices
+
+- **Common algorithms**:
+  - Linear Regression
+  - Decision Trees
+  - K-nearest neighbors
+  - Neural Networks
+  - ...
+
+- **Unsupervised Learning**
+- Learn from data **without** labeled outputs
+  - Goal: discover patterns, groupings, or structure in the data
+  - No explicit feedback signal
+  - Evaluation can be qualitative
+
+- **Main techniques**:
+  - **Clustering**: Group similar examples, e.g.,
+    - Customer segmentation
+    - Grouping news articles by topic without knowing the topics
+  - **Dimensionality Reduction**: Reduce number of variables with PCA while
+    preserving structure
+    - E.g., visualize high-dimensional data in 2D
+  - **Density Estimation**: Estimate probability distribution of data
+    - E.g., anomaly detection in server logs
+  - **Association Rule Learning**: Discover interesting relations between
+    variables
+    - E.g., market basket analysis (e.g., "people who buy X also buy Y")
+
+- **Common algorithms:**
+  - K-means
+  - PCA
+  - Autoencoders
+
+- **Reinforcement Learning**
+- Learn by **interacting with an environment** to **maximize cumulative reward**
+  - Learn policy $\pi(s) \to a$ that maximizes expected reward
+  - Trade-off between exploration (trying new actions) and exploitation (using
+    known good actions)
+  - Environments provide clear rules and feedback (win/loss/reward)
+  - Often involve physical simulation or real-world interaction
+
+::: columns
+:::: {.column width=60%}
+- **Core elements:**
+  - Agent: Learner and decision maker
+  - Environment: Everything the agent interacts with
+  - State $s$
+  - Action $a$
+  - Reward $r$
+
+- **Algorithms:**
+  - Q-learning
+  - Policy Gradient methods
+::::
+:::: {.column width=35%}
+```graphviz
+digraph BayesianFlow {
+    splines=true;
+    nodesep=1.0;
+    ranksep=0.75;
+
+    node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=12, penwidth=1.4];
+
+    // Node styles
+    Agent      [label="Agent", shape=box, fillcolor="#F4A6A6"];
+    Env        [label="Environment", shape=box, fillcolor="#B2E2B2"];
+
+    // Force ranks
+    //{ rank=same; Agent; Env; }
+
+    // Edges
+    Agent -> Agent [label="  State", fontcolor=black, labeldistance=2.0];
+    Agent -> Env [label="  Action", fontcolor=black, labeldistance=2.0];
+    Env -> Agent [label="  Reward", fontcolor=black, labeldistance=2.0];
+}
+```
+::::
+:::
+
+- **Reinforcement Learning: Examples**
+- In game playing, learn strategies through trial and error
+	- E.g., AlphaGo mastering the game of Go
+- In robotics, learn control policies for movement and manipulation
+- In autonomous driving, learn safe and efficient driving behaviors
+- In resource management, optimize allocation of limited resources over time
+	- E.g., data center cooling or CPU job scheduling
+- In personalized recommendations, adapt suggestions based on user interaction
+	- E.g., newsfeed ranking adjusting based on user clicks
+- In healthcare, optimize treatment plans over time
+
+# Techniques
+
+## Machine Learning in Practice
+
+- **Machine Learning Flow**
+- **Question**
+  - E.g., "How can we predict house prices?"
+- **Input data**
+  - E.g., historical data of house sales
+- **Features**
+  - E.g., number of bedrooms, location, square footage
+- **Algorithm**
+  - E.g., linear regression, decision trees
+- **Parameters**
+  - E.g., learning rate, number of trees in a random forest
+- **Evaluation**
+  - E.g., accuracy, precision, recall
+
+- **Machine Learning Flow**
+
+```graphviz[height=80%]
+digraph BayesianFlow {
+    rankdir=LR;
+    splines=true;
+    nodesep=1.0;
+    ranksep=0.75;
+    node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=12, penwidth=1.4];
+    // Node styles
+    "Question" [fillcolor="#F4A6A6"];
+    "Input data" [fillcolor="#FFD1A6"];
+    "Features" [fillcolor="#B2E2B2"];
+    "Algorithm" [fillcolor="#A0D6D1"];
+    "Parameters" [fillcolor="#A6E7F4"];
+    "Evaluation" [fillcolor="#A6C8F4"];
+    // Force ranks
+    // Edges
+    "Question" -> "Input data";
+    "Input data" -> "Features";
+    "Features" -> "Algorithm";
+    "Algorithm" -> "Parameters";
+    "Parameters" -> "Evaluation";
+}
+```
+
+- **Not all phases are equally important!**
+  - Question $>$ Data $>$ Features $>$ Algorithm
+
+- Clarity of the question impacts project success
+- Quality and relevance of data are crucial for performance
+- Proper feature selection simplifies the model and improves accuracy
+- Algorithm is often less important (contrary to popular belief!)
+
+- **Question**
+- **Make the question concrete and precise**
+  - Define the problem clearly
+  - Specify inputs and expected outputs
+  - Align question with business or research objectives
+  - E.g.,:
+    - **Bad**: _"How can we improve sales?"_
+    - **Good**: _"What factors most significantly impact sales of product X in
+      region Y during season Z?"_
+
+- Formulating question is **the most important part** of the machine learning
+problem
+  - Misunderstanding leads to:
+    - Solving the wrong problem
+    - Collecting wrong data
+    - ...
+
+- _"If I were given one hour to save the planet, I would spend 59 minutes
+  defining the problem and one minute resolving it"_ (Albert Einstein)
+
+- **Input Data**
+- Ensure **data is specific to prediction** goal
+  - E.g., use known movie ratings to predict unseen movie ratings from the same
+    population
+  - Training set $\approx$ test set
+
+- Relationship between data and prediction goal is **not always direct**
+  - E.g., interested in prices but predict supply and demand instead
+
+- Poor-quality data leads to inaccurate predictions
+  - _"Garbage in - garbage out"_
+
+- Recognize **when data is insufficient** for valid answers
+  - _"Combination of data and desire for an answer does not ensure a reasonable
+    answer can be extracted"_ (John Tukey)
+
+- **More data vs better models**
+  - Meta-studies show difference between generic and best model is like
+    5\%
+  - _"It's not who has the best algorithm that wins. It's who has the most
+    data"_ (Google researcher)
+  - _"Every time I fire a linguist, the performance of the speech recognizer
+    goes up"_ (IBM researcher in speech recognition)
+
+- **Features**
+- **Features** provide high-level information about inputs
+  - E.g., use intensity and symmetry for scanned numbers instead of raw bit maps
+
+- **Characteristics of good features**:
+  1. Enable data compression
+  2. Retain relevant information
+  3. Often created with expert knowledge
+
+- **Common mistakes in feature building**:
+  1. Automating feature selection may lead to overfitting
+     - Black box predictions can be accurate but stop working anytime
+     - E.g., Google Flu's unclear feature-model link
+  2. Ignoring data-specific quirks
+     - E.g., mislabeling outliers
+  3. Unnecessarily discarding information
+
+- **Models**
+- Best models are:
+  - **Interpretable**
+    - Allow users to understand and trust the model's decisions
+    - E.g., decision trees are appropriate in medical studies since they produce
+      a "reasoning"
+  - **Simple**
+    - Easier to implement and maintain
+    - Reduces the risk of overfitting
+  - **Accurate**
+    - Often accuracy is traded off for remaining characteristics
+    - E.g., accuracy vs interpretability, accuracy vs speed
+  - **Fast**
+    - To train and test
+    - Essential for real-time applications
+    - Reduces computational costs
+  - **Scalable**
+    - Can handle large datasets efficiently
+    - Important for growing data and user bases
+    - E.g., in the Netflix prize, Netflix didn't end up implementing the best
+      algorithm since it wasn't scalable enough
+
+## How to Do Research
+
+### Simple Is Better
+
+- **Occam'S Razor**
+- _The **simplest** model that fits the data is also the **most plausible**_
+  (Occam)
+  - Trim the model to the bare minimum necessary to explain the data
+  - _"An explanation of the data should be as simple as possible, but not
+    simpler"_ (Einstein?)
+  - **Simple** means:
+    - Less likely to fit a given data by coincidence
+    - An unlikely event is more significant if it happens (formalized in terms
+      entropy)
+  - **Better** means better out of sample performance
+
+- An object is **simple** when it is one of few possible objects
+  - Polynomial of order 2 is simpler than a polynomial of order 17
+    - There are many more polynomials of order 17 compared to order 2, although
+      both are infinite sets
+  - SVM (Support Vector Machine) characteristics:
+    - The separating hyperplane appears wiggly, but it is defined by a few
+      support vectors
+  - Complexity of a hypothesis $h$
+    - E.g., polynomial order, MDL (describe the hypothesis in terms of bits),
+      Kolmogorov complexity
+  - Complexity of a hypothesis set $\calH$
+    - E.g., VC dimension of the model
+  - Complexity of $h$ and $\calH$ are related by counting: if we need $l$ bits
+    to specify $h$, then $h$ is one of $2^l$ elements of a set $\calH$
+
+- **Model Soundness**
+- We cannot blindly accept the result of modeling
+  - A model should tell a story
+  - Always ask yourself: _"what criticisms would you give to the model if it was
+    presented to us for the first time?"_
+
+- Benchmark models: what are the performance if the model outputs:
+  - Outputs always 0 or 1
+    - E.g., long-only model for stock predictions
+  - Random results
+    - I.e., bootstrap of null hypothesis "there is no prediction power"
+
+- A perfect fit can mean nothing, e.g.,
+  - Get 2 data points on a plane
+    - Fit data with a linear relationship
+    - It is a perfect fit
+  - This means nothing since:
+    - There is always a line between 2 points
+    - The data cannot falsify the hypothesis
+  - The model (line) is too complex for the data set (only 2 points)
+
+- **Sampling Bias**
+- A model, when learning, sees the world in terms of the training data
+  - If data is sampled in a biased way, learning will produce a biased outcome
+
+- Formally: one of the few hypothesis of Hoeffding in learning theory is that
+  training and testing distributions are the same
+
+- Addressing sampling bias
+  - Weight or re-sample data to match testing distribution
+  - If data points have zero probability ($\Pr = 0$) in the data set, no
+    remedies are possible
+
+- **Data Snooping**
+- **Data snooping** is the improper use of data that biases ML model results
+  - Common trap for practitioners
+
+- **Sources** of data snooping
+  1. Contamination of training and test sets
+  2. Multiple testing issue
+  3. If data affects any learning step (e.g., feature engineering, model
+     selection, hyperparameter tuning), its assessment becomes optimistic
+
+- **Effects** of data snooping
+  - Models show inflated performance metrics which do not translate out of
+    sample
+  - Snooping leads to seemingly better performance:
+    - It is a "happy minefield"
+
+- **"Burning the Test Set"**
+- Repeatedly using the same data eventually leads to "success"
+  - The model starts fitting to specific data quirks
+  - The test set should not be used for training; this leads to over-optimism
+  - _"If you torture the data long enough, it will confess whatever you want"_
+
+- Solutions:
+  - Use the test set _exactly once_
+  - The VC dimension applies to the overall learning model, including all
+    attempted models
+  - MDL accounts for the number of fitting attempts in overfitting measurement
+  - Adjust p-values for multiple experiments
+
+### Research Methodology
+
+- **How to Achieve Out-Of-Sample Fit**
+- Goal: choose an hypothesis $g$ approximates the unknown target hypothesis $f$
+
+  $$
+  g \approx f \iff E_{out}(g) \approx 0
+  $$
+
+- Solution:
+  - Achieve
+    1. Good in-sample performance $E_{in}(g) \approx 0$
+    2. Good generalization $E_{out}(g) \approx E_{in}(g)$
+  - Then 1. + 2. $\implies$ good out-of-sample performance
+    $E_{out}(g) \approx 0$
+
+- **What to Do If Out-Of-Sample Fit Is Poor?**
+- The model performs well in sample ($E_{in} \approx 0$) but poorly out of
+  sample ($E_{out} \gg E_{in}$)
+
+- What does it mean?
+  - The in-sample performance are optimistic
+  - The model is overfitted and fails to generalize
+
+- What do we do?
+  - Run diagnostics before embarking in long term projects
+  - Gain insight on what works / does not work to understand how to improve
+    performance
+    - E.g., bias-variance curves and learning curves
+
+- How to fix?
+  - It depends on the diagnostics!
+  1. Training data
+     - Get more training data (it can take long time) $\iff$ fixes high variance
+  2. Features
+     - Remove features $\iff$ fixes high variance
+     - Add more features (it can take long time) $\iff$ fixes high bias
+     - Add derived features (e.g., polynomial features) $\iff$ fixes high bias
+  3. Regularization
+     - Decrease regularization amount $\lambda$ $\iff$ fixes high bias
+     - Increase regularization amount $\lambda$ $\iff$ fixes high variance
+
+- **Why Using a Lot of Data?**
+- Several studies show that:
+  - Different algorithms/models have remarkably similar performance
+  - Increasing training set improves performance
+
+- Thus it holds that:
+
+  $$
+  \text{High capacity model + massive training set = good performance}
+  $$
+
+- Using a high capacity model with many parameters (e.g., neural network)
+  $$
+  E_{in} \approx 0
+  $$
+  due to low bias (and high variance)
+- A massive data set helps avoid overfitting
+  $$
+  E_{out} \approx E_{in}
+  $$
+- These two conditions together
+
+  $$
+  E_{out} \approx E_{in} \approx 0 \implies E_{out} \approx 0
+  $$
+
+- **What to Do When One Has Lots of Data?**
+- You have $m$ = 100M examples in data set, what do you do?
+
+- Training on a lot of data might yield scalability issue:
+  - Slow
+  - Lots of compute
+  - Require work on infrastructure
+  - ...
+
+- Plot the learning curves as function of increasing
+  $m = 1k, 10k, 100k, 1M, ...$
+  - If the algorithm has large bias, it converges (training and validation
+    performance are similar) at $m = 1000$
+    - Add more features and complicate the model rather than training on 100M
+      instances
+  - If the variance is large, use all instances to train the model
+
+- **Why We Do Things?**
+- Always
+  - Ask: _"Why are we doing something?"_
+    - To understand the purpose of the task
+  - Ask: _"What do we hope to determine by performing the task?"_
+    - To clarify goals and outcomes of the task
+  - Encourage thinking about actions with the bigger picture in mind
+  - Avoid merely going through motions
+  - Promote critical thinking and awareness
+  - Prioritize tasks by importance and impact
+
+- E.g., when conducting a customer survey, ask:
+  - _"Why is feedback being collected?"_
+    - To improve product features and customer service
+  - _"What is the desired outcome?"_
+    - To identify areas for improvement and innovation
+
+- E.g., before starting a marketing campaign, ask:
+  - _"Why is this campaign run?"_
+    - To increase brand awareness or drive sales
+  - _"What are the specific goals?"_
+    - Set target number of new leads or click-through rates
+
+- **Summary of the Results, Next Steps, Follow Ups**
+- Always have a summary of the results
+  - It's like a high-level map of what we have done and what we have discovered
+    - E.g., "smoothing model coefficients helps"
+  - Highlight major findings
+  - Interpret the results
+    - E.g., _"The increase in sales is likely due to the new marketing
+      strategy."_
+  - Conclusions
+    - Summarize what the data suggests or confirms
+    - E.g., _"Our hypothesis that user engagement increases retention is
+      supported"_
+
+- Always have a reference to more detailed results
+  - Provide quick insights before diving into details
+
+- Always have next steps / follow-ups
+  - What do you expect that will happen?
+  - What results do you expect?
+    - Like thinking $n$ moves ahead in chess
+  - E.g., _"Next, we will conduct a detailed analysis on the demographics
+    contributing most to sales growth"_
+  - Outline potential experiments or analyses to validate findings further
+
+- **Example of Spam Filter Classification**
+- We use $N = 4$ words in an email to distinguish spam from non-spam emails
+  using logistic regression
+  - Words can be: `buy`, `now`, `deal`, `discount`, `<your name>`
+
+- How to improve the performance of this classifier?
+  1. Collect more data
+     - E.g., honeypot project: set up fake email account and collect spam
+  2. Use better features
+     - E.g., email routing information: spammers use unusual accounts and mask
+       emails as legitimate
+  3. Use better features from message body
+  4. Detect intentional misspellings
+     - Spammers use misspelled words (e.g., `w4tch` for `watch`) to confuse the
+       classifier
+     - Use stemming software
+
+- **Right and Wrong Approach to Research**
+
+- **Bad**
+  1. It is not clear how to prioritize the different possible tasks
+  2. Use gut feeling and pick one task
+  3. Complete the task
+  4. Re-evaluate performance
+
+- **Good**
+  1. Build a simple algorithm
+     - Within 1 day
+  2. Set up the performance evaluation framework
+     - A single number and bounds to evaluate
+     - Aim to improve that number
+     - Evaluate with cross-validation
+  3. Set up diagnostic tools
+     - Compute learning and bias-variance curves
+     - Avoid premature optimization by understanding the issue before fixing it
+  4. Manually review misclassified emails in the cross-validated set
+     - What features might help to improve performance?
+     - E.g., what types of emails are misclassified?
+
+- Sometimes an approach must be tried to see if it works
+  - E.g., stemming software to consider certain words equivalent
+
+## Pipeline Organization
+
+- **How Are Machine Learning Systems Organized?**
+- Machine learning systems are typically organized in a pipeline
+  1. Break down the problem into sub-problems
+  2. Solve problems one at the time
+  3. Combine the solutions to the sub-problems into a solution to the initial
+     problem
+
+- The performance $p$ of the entire ML pipeline are given by:
+
+  $$
+  p_{system} = \sum_i p_i \cdot \alpha_i
+  $$
+
+  where:
+  - $p_i$ is the performance of each stage $p_i$
+  - $\alpha_i$ is the importance of each stage
+
+- **ML Pipeline: Example of Photo OCR System**
+- Goal: build systems to read text in a picture
+  - OCR = "Optical Character Recognition"
+
+- Stages of ML pipeline for OCR:
+  - Text detection: find areas of the picture with text
+  - Character segmentation: split text into boxes, one per letter
+    - E.g., `h e l l o`
+  - Character classification: classify characters, one at a time
+  - Spelling correction: fix errors in text using context
+    - E.g., `hell0` corrected to `hello`
+
+- Issues with text detection:
+  - Unknown text location and size
+
+- Solution
+  - Use a sliding window classifier
+  - Works as evaluating a classifier is often cheap compared to training
+  - Sliding window classifiers can be used for text detection and character
+    segmentation
+
+- **Text detection**
+  - Train a classifier to recognize letters vs non-letters
+  - Scan image in two directions, different sizes looking for text
+  - Create a map of text likelihood (e.g., heatmap) using classifier
+    probabilities
+  - Enclose text areas in boxes
+  - Discard boxes not fitting aspect ratio (valid text width > height)
+
+- **Character segmentation**
+  - Use sliding window classifiers to find "breaks" between characters
+  - Use a 1D sliding window for character segmentation
+
+- **The Ideal Recipe for ML**
+- The ideal recipe for ML is:
+
+  $$
+  \text{low-bias algorithm + massive amount of data to train}
+  $$
+  - Use learning curves to make sure we are taking advantage of more data
+
+- Always ask yourself: _"how much work is to get 10x more data than we currently
+  have?"_
+- Often it is not that difficult:
+  1. Artificial data
+     - E.g., synthesize or amplify data set
+  2. Collect and label by hand
+     - E.g., crowd sourcing like Amazon Mechanical Turk
+
+- **OCR Pipeline: Example of Artificial Data Synthesis**
+- How can we increase data set size?
+  1. Synthesize data set
+     - Use font libraries to generate large training sets
+     - Paste characters against random backgrounds
+     - Apply scaling, distortion, adding noise, etc
+  2. Amplify a data set
+     - Start from a training set and add examples by warping/distorting existing
+       examples
+
+- Transformations and noise should be specific to the application domain
+  - E.g., Gaussian noise is not always appropriate
+
+- **Ceiling Analysis for ML Pipeline**
+- The most valuable resource is time
+  - Sometimes one works on an optimization for months
+  - The optimization doesn't make much difference
+
+- **Problem**: On which part of the pipeline should time/resource be spent?
+
+- **Solution**: Ceiling analysis
+  - Technique to analyze performance of pipelines
+  - Have a single number representing the performance of the entire system
+    - E.g., accuracy for an OCR system
+  - For each component:
+    - Mock the component with a box that always gives the correct output
+      (=oracle)
+    - Leave the remaining components untouched
+    - Compute performance of the entire pipeline
+  - Understand which component is critical to performance by estimating an upper
+    bound for overall performance when that component improves 10\%
+  - Don't trust your gut feeling but measure!
+
+## Input Processing
+
+
+- Data cleaning
+- Dimensionality reduction
+- Feature engineering
+
+## Learning Algorithms
+
+### Gradient Descent
+
+- **The Problem of Minimizing a Function**
+
+::: columns
+:::: {.column width=80%}
+- **Goal**: minimize a function $J(\vw)$
+  - E.g., in-sample error $E_{in}(\vw)$
+
+- **Solutions**:
+  1. Analytical solution
+     - Impose the gradient of $J(\vw)$ to equal 0
+     - Find a closed-form solution for $\vw^*$
+  2. Numerical solution:
+     - Use an iterative method to update $\vw$ to reach the minimum value of
+       $J(\vw)$
+     - E.g., gradient descent
+     - It works even if there is an analytical solution
+::::
+:::: {.column width=20%}
+ ![](lectures_source/figures/Lesson6_Gradient_descent_2.png)
+::::
+:::
+
+- **Gradient Descent: Intuition**
+
+::: columns
+:::: {.column width=80%}
+- **Problem**:
+  - We are on a hilly surface and we want to walk down to the bottom of the hill
+
+- **Solution**:
+  - At each point:
+    - We look around
+    - We move a step in the direction where the surface is steepest
+  - We keep doing until we reach the bottom
+
+- Gradient descent
+  - Is a general technique for minimizing a twice-differentiable function
+  - Converges to
+    - A local minimum in general
+    - The global minimum if $J(\vw)$ is convex (e.g., logistic regression and
+      linear models)
+::::
+:::: {.column width=20%}
+![](lectures_source/figures/Lesson6_Gradient_descent_1.png)
+::::
+:::
+
+- **Gradient descent with fixed learning rate (1/3)**
+- Consider the contour plot of a function
+
+- Start from a point $\vw(0)$ (random, the origin, ...)
+
+- At each step, move a fixed amount $\eta$ in the weight space (fixed learning
+  rate):
+
+  $$
+  \vw(t + 1) = \vw(t) + \eta \hat{\vvv}
+  $$
+
+  where $\hat{\vvv}$ is a unit vector
+
+- Pick $\hat{\vvv}$ to move to a value of $E_{in}(\vw)$ as negative as possible
+
+  - The change for $E_{in}$ is:
+    \begingroup \footnotesize
+    \begin{align*}
+    \Delta E_{in}
+    & = E_{in}(\vw(t + 1)) - E_{in}(\vw(t))
+    & \\
+    & = E_{in}(\vw(t) + \eta \hat{\vvv}) - E_{in}(\vw(t))
+    & \text{ (replacing the expression of $\vw(t + 1)$)} \\
+    & = \eta \nabla E_{in}(\vw(t))^T \hat{\vvv} + O(\eta ^ 2)
+    & \text{ (using Taylor expansion)} \\
+    \end{align*}
+    \endgroup
+  - Gradient descent keeps only $O(\eta)$ the term and ignores the rest
+  - Conjugate gradient considers up to $O(\eta^2)$ and ignores higher
+    infinitesimals
+
+- **Gradient Descent with Fixed Learning Rate (2/3)**
+
+- The minimal value of the scalar product
+  - Is $- \eta \|\nabla E_{in}(\vw(t))\|$,
+  - Happens when
+    $\hat{\vvv} = - \frac{\nabla E_{in}(\vw(t))}{\|\nabla E_{in}(\vw(t))\|}$
+- The change in weights is:
+  $$
+  \Delta \vw = - \eta \frac{\nabla}{\|\nabla\|}
+  $$
+- It is called "gradient descent" since we descend along the gradient of the
+  function to optimize
+
+- **Gradient Descent with Fixed Learning Rate (3/3)**
+- Each component of the weight $\vw$ is updated with the partial derivative with
+  respect to that coordinate:
+  \begin{align*}
+  \vw(t + 1)
+  &= \vw(t) - \eta \hat{v} \\
+  \vw(t + 1)
+  &= \vw(t) - \eta \frac{\nabla E_{in}(\vw(t))}{\|\nabla E_{in}(\vw(t))\|} \\
+  w_j(t + 1)
+  &= w_j(t) - \eta
+  \frac{1}{\|\nabla E_{in}(\vw(t))\|}
+  \frac{\partial E_{in}(\vw)}{\partial w_j}
+  \end{align*}
+
+- The update of all components should be simultaneous, i.e., computed at once
+
+- A step of the optimization when we update the solution (weights) is called
+  epoch
+
+- **Gradient Descent: Stopping Criteria**
+- In theory, stop when $\Delta E_{in} = \vv{0}$
+  - Numerically this might not occur
+- In practice, stop when
+  - The variation of $E_{in}$ is smaller than a given threshold
+    $\Delta E_{in} < \theta$
+  - We have reached a certain number of iterations
+
+- Monitoring gradient descent
+  - In theory, only need to compute the derivatives of the function $J(\vw)$ to
+    optimize
+  - In practice, need to monitor the algorithm progress by recomputing the cost
+    function $J(\vw)$ periodically to make sure it is decreasing
+
+- **Setting $\eta$ in Gradient Descent with Fixed Learning Rate**
+- Consider a 1D convex function
+  - If $\eta$ is small:
+    - The linear approximation of $E_{in}$ is effective
+    - Many steps are needed to converge to the minimum
+  - If $\eta$ is large:
+    - The linear approximation fails (higher terms affect values)
+    - It "bounces around"
+
+![](lectures_source/figures/Lesson6_Gradient_descent_3.png)
+
+- Idea: vary learning rate $\eta$ during gradient descent
+  - Smaller learning rates may find a better minimum
+  - Reduce $\eta$ as a function of iterations
+  - Cons: this introduces an additional parameter to tune
+
+- **Gradient Descent with Variable Learning Rate**
+- In gradient descent with fixed learning rate (i.e., constant change in weight
+  space), we use:
+
+  $$
+  \Delta \vw = - \eta \frac{\nabla J}{\|\nabla J\|}
+  $$
+
+- To converge quickly, we want to:
+  - Move fast in weight space (large $\eta$) when the surface is steep (large
+    gradient)
+  - Move slow in weight space (small $\eta$) near the minimum to avoid bouncing
+    around (small gradient)
+
+- Ideally, $\eta$ should increase with the slope: $\eta \propto \|\nabla J\|$
+
+- This is called gradient descent with variable learning rate:
+
+  $$
+  \Delta \vw = - \eta \nabla J
+  $$
+
+- **Feature Scaling in Gradient Descent**
+- Gradient descent converges faster if features are scaled to the same range
+  - Feature scaling techniques include min-max scaling and standardization
+  - E.g., applying standardization to a dataset can transform feature values to
+    have a mean of 0 and a standard deviation of 1
+
+- Otherwise, different gradient components have different errors due to
+  numerical approximation, causing the gradient to bounce around
+  - Unscaled features can lead to slow and unstable convergence due to varying
+    magnitudes
+  - E.g., if one feature ranges from 1 to 1000 and another ranges from 0.01 to
+    1, the large disparity can cause inefficient updates
+
+- **Issues with Batch Gradient Descent**
+- Consider the case of squared error with $n$ samples
+  $$
+  E_{in}(\vw)
+  = \frac{1}{n} \sum_i e(h_{\vw}(\vx_i) - y_i)
+  = \frac{1}{n} \sum_i (h_{\vw}(\vx_i) - y_i) ^ 2
+  $$
+- The Batch Gradient Descent (BSD) requires to update each component of the
+  weight vector with an expression like:
+  $$
+  \vw(t + 1) = \vw(t) - \eta \frac{\nabla E_{in}}{\|\nabla E_{in}\|}
+  $$
+- In terms of coordinates for squared error:
+  $$
+  w_j(t + 1) = w_j(t) -
+  \eta \frac{2}{n}
+  \sum_{i=0}^n (h_{\vw}(\vx_i) - y_i)
+  \frac{\partial h_{\vw}(\vx_i)}{\partial w_j}
+  $$
+- With a large number of training examples (e.g., $N = 10^6$), gradient descent:
+  - Is computationally expensive as it requires evaluating the gradient from all
+    examples for a single update
+  - Requires storing all the data in memory
+
+### Stochastic Gradient Descent
+
+- **Stochastic Gradient Descent**
+- **Idea** of Stochastic Gradient Descent (SGD)
+  - Update the weights only for one training example picked at random
+
+- **Algorithm**
+  - Pick one $(\vx_n, y_n)$ at a time from the available examples
+  - Compute $\nabla e(h(\vx_n), y_n)$ to update the weights:
+    $$
+    \Delta \vw = -\eta \nabla e
+    $$
+  - Update the weight considering only one random example:
+    $$
+    w_j(t + 1) = w_j(t) -
+    \eta \frac{2}{n}
+    (h_{\vw}(\vx_t) - y_t)
+    \frac{\partial h_{\vw}(\vx_t)}{\partial w_j}
+    $$
+
+- $\nabla e$ is a function of a random var $\vx_n$
+  - The average direction of SGD is the same direction as batch version
+    $$
+    \EE[\nabla e]
+    = \frac{1}{N} \sum \nabla e(h(\vx_n), y_n)
+    = \nabla \frac{1}{N} \sum e(h(\vx_n), y_n)
+    = \nabla E_{in}
+    $$
+
+- In Stochastic Gradient Descent (SGD):
+  - The path in the weight space is more random
+  - It does not even converge but rather oscillates around the local minimum
+
+- **Mini-Batch Gradient Descent**
+- Bring together characteristics of both Batch and Stochastic Gradient Descent
+
+- Use $b$ examples to make an update to the current weight
+  - $b$ represents the batch size, which is a hyperparameter you can choose
+  - A common choice for $b$ might be $b = 32$ or $b = 64$
+
+- Mini-batch GD offers a balance between SGD noisiness and full-batch
+  approaches, using small, random data samples for updates
+
+- **On-Line Learning and Gradient Descent**
+- Continuous stream of training examples requires updating the model
+  - In real-time systems, new data points arrive and the model adapts without
+    fully retraining
+  - E.g., in stock market prediction models, each transaction can dynamically
+    adjust model weights
+  - Handle variation in the dynamics of the underlying process
+
+- Stochastic gradient (SGD) and mini-batch descent are suitable for online
+  learning, updating the model one example at a time
+
+- Discard examples for a "compressed" model representation
+  - Useful for large data streams where storing every data point is impractical
+  - E.g., in training a language model on live chat data, older conversations
+    might be discarded after updates to maintain relevant patterns in the model
+
+- **SGD vs BGD vs Mini-Batch**
+- To update the weights:
+  - BGD (batch gradient descent) uses all the training examples
+  - SGD (stochastic gradient descent) uses a single (random) training example
+  - Mini-batch GD uses only a subset of training examples
+
+\begingroup \scriptsize
+
+| **Aspect**      | **Batch Gradient Descent**               | **Stochastic Gradient Descent**      |
+| --------------- | ---------------------------------------- | ------------------------------------ |
+| Computation     | Uses all examples                        | One example at a time                |
+| Memory          | Requires all examples in memory          | Require less memory                  |
+| Randomization   | More likely to terminate in flat regions | Avoid local minima due to randomness |
+| Regularization  | No implicit regularization               | Oscillations act as regularization   |
+| Parallelization | Can be parallelized                      | Less parallel-friendly               |
+| Online Learning | Not suitable                             | Suitable for online learning         |
+
+\endgroup
+
+- **Map-Reduce for Batch Gradient Descent**
+- In map-reduce we use $k$ machines to parallelize the summation (map step) and
+  then we send the $k$ partial sums to a single node to accumulate the result
+  (reduce step)
+- Batch GD (and many learning algorithms) can be expressed in this map-reduce
+  form
+
+
+- **Coordinate Descend**
+- Minimize $J(x_0, ..., x_n)$ by optimizing along one direction $x_i$ at a time
+  - Instead of computing all derivatives
+
+- **Algorithm**
+  - Pick a random starting point $\vw(0)$
+  - Pick a random order for the coordinates $\{ x_i \}$
+  - Find the minimum along the current coordinate (1D optimization problem)
+  - Move to the next coordinate $x_{i+1}$
+  - The sequence of $\vw(t)$ is decreasing
+  - A minimum is found if there is no improvement after one cycle of scanning
+    all coordinates
+  - The minimum is local
+
+- **Gradient Descent vs Pseudo-Inverse for Linear Models**
+- For linear models we can use either pseudo-inverse or gradient descent to find
+  optimal $\vw^*$
+
+- **Gradient descent**
+  - Choose learning rate $\eta$
+  - Requires many iterations to converge
+  - Monitor stopping criteria, oscillations, etc
+  - Effective for many features $P$
+
+- **Pseudo-inverse**
+  - No parameter selection needed
+  - Converges in one iteration (with nested loops)
+  - Computes $(\mX^T \mX)^{-1}$, a $P \times P$ matrix
+    - Inverse complexity $O(P^3)$
+    - E.g., for $P \approx 10,000$, gradient descent is preferable
+
+## Performance Metrics
+
+- **How to Make Progress in ML Research**
+- There are many possible directions for research
+  - Different features
+  - Different data preprocessing methods
+  - Different models
+  - Different training algorithms
+  - Different evaluation techniques
+  - Explore optimization strategies
+
+- What to do?
+
+- Approach
+  - Evaluate models systematically using a single number
+    - Implement metrics (E.g., accuracy, F1 score) for insight
+    - Use cross-validation for model validation
+  - Statistical tests to ensure differences are not random
+    - Utilize hypothesis testing for genuine improvements
+    - Conduct A/B testing for real-world validation
+
+- **How to Measure Classifier'S Performance?**
+- Success / hit / win rate (or error / miss rate)
+  - Measures the proportion of correct predictions by the model
+  - Important for understanding overall accuracy
+  - E.g., in binary classification, 80 correct predictions out of 100 result in
+    an 80% success rate
+
+- Log probability / cross-entropy error
+  - Evaluates classification model with probabilities between 0 and 1
+  - E.g., lower cross-entropy loss indicates better performance
+
+- **Precision / recall / F-score**
+  - Useful for evaluating models in imbalanced data scenarios
+  - Precision: ratio of correctly predicted positive observations to total
+    predicted positives
+    - E.g., a precision of 0.75 means 75% of identified positives are true
+      positives
+  - Recall: ratio of correctly predicted positive observations to actual
+    positives
+    - E.g., a recall of 0.60 means 60% of actual positives were correctly
+      identified
+  - F-score: weighted harmonic mean of precision and recall
+
+- **Utility function**
+  - Customizes the evaluation metric to prioritize types of errors and success
+    - E.g., true / false positives / negatives
+  - E.g., in medical diagnosis, a utility function might give higher weight to
+    minimizing false negatives to prevent missed diagnoses
+
+- **Training vs Test Set**
+- Performance on train set $E_{in}$ is an optimistic estimate of $E_{out}$
+  - One can have:
+    - 0\% error rate on training data (e.g., memorizing responses for training
+      set)
+    - 50\% error rate on test set (e.g., by answering randomly)
+
+- To evaluate model performance, use a test set that played no role in training
+
+- Training and test sets should be representative samples of the problem
+  - E.g., credit risk problem
+    - One cannot use data from a bank branch in Florida to assess a model built
+      with data from a bank branch in New York
+    - Characteristics of the populations are very different
+
+- **Lots of Data Scenario vs Scarce Data Scenario**
+- **Lots of data scenario**
+  - Ideal to have lots of data (ideally infinite)
+  - Learn on lots of data
+    - Fit all degrees of freedom of a complex model
+  - Predict on lots of data
+    - Assess precise out-of-sample performance
+
+- **Scarce data scenario**
+  - Often data (especially data of high quality) is scarce
+    - E.g., facial recognition datasets with limited annotated data needing
+      careful management
+  - Cannot use all data as a training set
+  - Need to hold out data to estimate performance metrics and bounds
+    - Split the data 70-30 or 80-20 in train and test sets
+    - Consider cross-validation techniques to maximize data usage
+  - Other approaches:
+    - Augment data artificially, like data augmentation in image processing
+    - Utilize transfer learning with pre-trained models on related tasks
+
+
+- **Splitting Data Into Training, Validation, Test Sets**
+- Training, validation, and test sets must be:
+  - Distinct
+  - Representative of the problem
+    - E.g., each class in all sets must be represented according to the original
+      data
+  - Sized based on available data and problem needs
+
+- To ensure sets have the same distribution:
+  - Stratified sampling
+    - E.g., each class label is proportionally represented in each set
+  - Shuffle and then sample
+    - Achieves randomization, maintaining distribution
+  - Sample and check statistics of variables (e.g., mean, std dev, PDF)
+    - Compare these statistics to ensure each set mirrors the broader dataset
+
+- **Rule of Thumbs for Data Set Splits**
+- If $n$ is **large** $\to$ use a 60-20-20 split
+  - Training: 60%
+  - Validation: 20%
+  - Test: 20%
+
+- If $n$ is **medium** $\to$ use a 60-40 split
+  - Training: 60%
+  - Test: 40%
+  - Not possible to learn hyperparameters, so no validation set
+
+- If $n$ is **small** $\to$ use cross-validation and report "small data size"
+  - Use K-fold cross-validation
+  - Be cautious of the increased chance of high accuracy by chance
+  - Is machine learning for the given sample size even suitable?
+
+- **Can We Ever Use Test Set as Training Set?**
+- Once the model is selected and validated, reuse all available data (including
+  the test set) to generate the model for deployment
+  - This ensures the model benefits from all available information
+
+- Generally, more data is better, though returns diminish after exceeding a
+  certain volume
+  - Initially, increasing data size can significantly improve model performance
+  - Eventually, adding more data results in smaller accuracy gains and may not
+    justify the increased computational cost
+
+- **In-Sample vs Out-Of-Sample Error Expressions**
+- We want to find a function $h$ that approximates the unknown function $f$,
+  $h \approx f$ over the space of inputs $\vx \in \calX$ ("script X")
+
+- The error is usually defined point-wise:
+
+  $$
+  e(h(\vx_i), f(\vx_i))
+  $$
+  - E.g.,
+    - Squared error: $e(\vx) = (h(\vx) - f(\vx))^2$
+    - 0-1 binary error: $e(\vx) = I[h(\vx) == f(\vx)]$
+    - Log probability: $e(\vx) = - \log( \Pr(h(\vx) == f(\vx)) )$
+
+- In-sample error is computed using all points in the training set:
+
+  $$
+  E_{in}{(h)} = \frac{1}{N} \sum_{i=1}^N e(h(\vx_i), f(\vx_i))
+  $$
+
+- Out-of-sample error is computed on the entire space of inputs $\calX$
+
+  $$
+  E_{out}(h) = \EE_{\vx \in \calX}[e(h(\vx), f(\vx))]
+  $$
+
+- **Mean Squared Error (MSE)**
+- MSE is the average difference of squared error:
+
+  $$
+  \text{MSE}
+  \defeq \frac{1}{N} \sum_{i=1}^N (h(\vx_i) - f(\vx_i))^2
+  $$
+  - MSE measures the estimator quality, quantifying the difference between
+    estimated and actual values
+  - E.g., in a house price prediction model, MSE determines how close predicted
+    prices are to actual prices
+
+- **Cons:**
+  - It doesn't share the unit of measure with the output
+    - Distorts error interpretation; predicted and actual values are usually in
+      different units
+  - Sensitive to outliers
+    - A single large error can disproportionately affect the MSE
+    - Use median absolute deviation (MAD), median of squared error for
+      robustness against outliers
+
+- **Root Mean Squared Error (RMSE)**
+- RMSE is the standard deviation of the Mean Squared Error (MSE):
+
+  $$
+  \text{RMSE} \defeq \sqrt{\text{MSE}}
+  = \sqrt{\frac{1}{N} \sum_{i=1}^N (h(\vx_i) - f(\vx_i))^2}
+  $$
+
+- **Pros:**
+  - Same units as the output, allowing intuition of its magnitude compared to
+    the mean
+  - Facilitates comparison between different data sets or models since the
+    metric is normalized to the output's scale
+
+- **Cons:**
+  - Sensitive to outliers (like MSE) which can excessively affect the metric
+  - May not be suitable for ranking models when outliers or skewed distributions
+    are present
+
+- **Median-Based Metrics**
+- We can use metric based on median (i.e., the 0.5 quantile of absolute error):
+
+- Median absolute deviation:
+
+  $$
+  \text{MAD} \defeq \text{median}_i(|h(\vx_i) - f(\vx_i)|)
+  $$
+
+- Median squared error:
+
+  $$
+  \defeq \text{median}_i(|h(\vx_i) - f(\vx_i)|^2)
+  $$
+
+- **How to Choose an Error Measure?**
+
+- Error measure depends on the **application** and should be **specified by the
+  "customer"**:
+  - The customer needs to define what constitutes an acceptable level of error
+    for their specific use case
+  - E.g., medical applications might have a low tolerance for errors, while a
+    recommendation system might have a higher tolerance
+
+- Otherwise, we can pick:
+  - A **plausible error measure**:
+    - E.g., squared error is commonly used when assuming Gaussian noise in the
+      data
+  - A **"friendly error" measure**:
+    - E.g., measures that allow for closed-form solutions simplify calculations
+      significantly
+    - Convex optimization-friendly measures ensure optimization algorithms find
+      the global minimum easily
+
+- **Error Measures: Fingerprint Verification Example**
+- In fingerprint verification:
+  - Recognizing a valid fingerprint has no error
+  - Otherwise, it is a false positive or a false negative
+
+- Error weight depends on the application
+  - For the same problem in two set-ups, the error measure is the opposite
+  - For supermarket applications:
+    - False positives are minor (e.g., one more discount)
+    - False negatives are costly (e.g., annoyed customer, slow line)
+  - For CIA building access:
+    - False negatives are acceptable (triggers further security)
+    - False positives are disastrous
+
+### Precision and Recall
+
+- **Error Metrics for Skewed Classes**
+- When classes are skewed (i.e., one class is very rare), accuracy can be
+  misleading
+  - Use metrics like confusion matrix, precision, and recall
+
+- Example:
+  - Train a classifier to distinguish tumors as:
+    - $y = 1$: malignant
+    - $y = 0$: benign
+  - Classifier's error rate is 1% (i.e., guess correctly 99% of the time) seems
+    good
+  - But only 0.5% of patients have cancer
+    - A trivial classifier that always outputs $y = 0$ has a 0.5% error rate!
+    - Now a 1% error rate does not look good anymore
+
+- **Decision Matrix ::: Columns :::: {.Column Width=60%}**
+- Aka confusion matrix
+
+- Typically $y = 1$ encodes the rare class to predict
+
+- Assuming actual and predicted class $\in \{0, 1\}$, we have 4 possible cases:
+  - $act = 1$, $pred = 1$: true positive (TP)
+  - $act = 0$, $pred = 0$: true negative (TN)
+  - $act = 1$, $pred = 0$: false negative (FN) (output $pred = 0$, but it is
+    wrong)
+  - $act = 0$, $pred = 1$: false positive (FP) (output $pred = 1$, but it is
+    wrong)
+
+- Aggregate decision matrix in precision and recall
+::::
+:::: {.column width=35%}
+
+```tikz
+% Draw matrix
+\draw[thick] (0,0) rectangle (4,4);
+\draw[thick] (0,2) -- (4,2); % horizontal middle
+\draw[thick] (2,0) -- (2,4); % vertical middle
+
+% Labels for actual class
+\node[rotate=90] at (-0.8,3) {act = 1};
+\node[rotate=90] at (-0.8,1) {act = 0};
+
+% Labels for predicted class
+\node at (1,4.3) {pred = 1};
+\node at (3,4.3) {pred = 0};
+
+% Cell labels
+\node at (1,3) {\textbf{TP}};
+\node at (3,3) {\textbf{FN}};
+\node at (1,1) {\textbf{FP}};
+\node at (3,1) {\textbf{TN}};
+```
+::::
+:::
+
+- **Precision vs recall**
+- Assume that $y = 1$ encodes the rare event we want to detect
+
+- **Precision** measures how often there is a true positive _given that pred =
+  1_
+
+  \begingroup \small
+
+  $$
+  \text{precision}
+  \defeq \Pr(\text{TP} | \text{pred == 1})
+  = \frac{|\text{pred == 1} \land \text{act == 1}|}{|\text{pred == 1}|}
+  = \frac{\text{TP}}{\text{TP} + \text{FP}}
+  $$
+
+  \endgroup
+
+- **Recall** measures how often there is a true positive _given that act = 1_
+  \begingroup \small
+
+  $$
+  \text{recall}
+  \defeq \Pr(\text{TP} | \text{act == 1})
+  = \frac{\text{TP}}{|\text{act == 1}|}
+  = \frac{\text{TP}}{\text{TP} + \text{FN}}
+  $$
+
+  \endgroup
+
+- Both are conditional probability measuring the fraction of TP under different
+  circumstances:
+  - (Pre)cision: pred = 1
+  - Rec(a)ll: act = 1
+
+- Precision/recall are widely used in information retrieval
+  - E.g., a search engine:
+    - Returns 30 pages; only 20 are relevant $\implies$ precision = 20 / 30 = 2
+      / 3
+    - Fails to return another 40 relevant pages $\implies$ recall = 20 /
+      (40 + 20) = 20 / 60 = 1 / 3
+
+- **Precision / Recall in Terms of Quality / Quantity**
+- **Precision**
+  - Increasing precision means when we predict 1, we are more likely to be right
+    - E.g., in a spam email detection system, "precision is 90%" means 90% of
+      the emails marked as spam are actually spam
+  - A higher precision indicates fewer false positives
+  - Measures "quality" of prediction
+
+- **Recall**
+  - Increasing recall means we predict more instances when the outcome is 1
+    - E.g., in a spam email detection system, "recall is 80%" indicates 80% of
+      all actual spam emails were correctly identified as spam
+  - A higher recall means fewer false negatives
+  - Measures "quantity" of prediction (coverage)
+
+- **Precision / recall for trivial classifiers**
+- A classifier that outputs always the most common class 0 has:
+  \begin{alignat*}{3}
+  \text{precision}
+  & = 0
+  & \text{(since TP = 0)}
+  \\
+  \text{recall}
+  & = 0
+  & \text{(since TP = 0)}
+  \\
+  \end{alignat*}
+- A classifier that outputs always the rare class 1 has:
+  \begin{alignat*}{3}
+  \text{recall}
+  & = 1
+  & \text{(since FN = 0)}
+  \\
+  \text{precision}
+  & \defeq \Pr(\text{TP} | \text{pred == 1})
+  & \text{(by definition)}
+  \\
+  &= \frac{\text{TP}}{\text{TP + FP}} 
+  & \text{($TP + FP = n$ because}
+  \\
+  &= \frac{\#(y = 1)}{n}
+  & \text{classifier always emits 1)}
+  \\
+  &= \Pr(\text{pos}) \approx 0
+  & \text{(the positive class is very rare)}
+  \\
+  \end{alignat*}
+- A trivial classifier has precision or recall close to 0
+
+- **Trading Off Precision and Recall**
+- In theory, we want to increase both precision and recall
+
+- In practice, modify the threshold of a probabilistic classifier to trade off
+  precision and recall in practice
+
+- E.g., use logistic regression to predict cancer:
+  - With a threshold = 0.5, the classifier has:
+    - Precision = $\frac{\text{TP}}{|\text{pred == 1}|}$
+    - Recall = $\frac{\text{TP}}{|\text{act == 1}|}$
+  - Increase the threshold $\implies$ output 1 only if more confident, i.e.,
+    increase precision
+  - Decrease the threshold $\implies$ output 1 more often, decreasing the
+    chances of missing a possible case of cancer, i.e., increase recall
+
+
+- **Precision-Recall: Pros / Cons**
+- Pros:
+  - Give insight on the behavior of a classifier (e.g., confusion matrix)
+  - Avoid mistaking a trivial classifier for a good classifier
+
+- Cons:
+  - We have two different numbers, thus it is difficult to compare classifiers
+    to each other
+  - Solutions: F-score, AUC
+
+- **Precision-Recall Curves**
+- **Aka ROC curves**
+
+- Plot the curve on a precision-recall plane: ($y =$ precision, $1 - x =$
+  recall) to show the precision vs recall trade-off for a classifier
+  - E.g., changing the threshold of logistic regression
+
+- A curve higher than another means a better classifier, since for the same
+  recall we can get a higher precision
+  - The best classifier (precision = recall = 1) is in the top-right corner
+
+- The precision-recall plot can have different shapes, e.g.,
+  - Diagonal (pure luck)
+  - Convex up (better than luck)
+  - Convex down (worse than luck)
+
+
+- **Area Under the Curve**
+- **AUC** is the area under the precision-recall curve
+  - Provides a robust metric by integrating over all thresholds
+  - Higher AUC indicates better performance in differentiating between classes
+  - AUC = 0.5 suggests no discriminative power, similar to random guessing,
+  - AUC closer to 1.0 indicates high performance
+
+- **Pros**:
+  - Single number summarizing classifier behavior, useful for comparing
+    different models
+  - Does not require selecting a threshold for performance calculation
+  - Can handle imbalanced datasets effectively
+
+- E.g., consider a classifier for medical diagnosis
+  - The AUC helps understand how well the model distinguishes between patients
+    with and without a disease across all thresholds
+
+- **F-Score**
+- The F-score is the harmonic mean of precision and recall:
+
+  $$
+  \text{F-score}
+  \defeq \frac{2}{\frac{1}{P} + \frac{1}{R}}
+  = 2 \frac{P \cdot R}{P + R}
+  $$
+
+- **Interpretation:**
+  - Trivial classifiers: $P = 0$ or $R = 0$ $\implies$ F-score = 0
+  - Perfect classifiers: $P = R = 1$ $\implies$ F-score = 1
+  - For F-score to be large, both $P$ and $R$ must be high
+
+- Why not just averaging $P, R$?
+  - A classifier that always outputs 1 has $R = 1$ and $P = 0$
+  - $\frac{P + R}{2} = \frac{1}{2}$, while we prefer a low value (ideally 0)
+
+## Model Selection
+
+- **Model Selection Problem**
+- Model selection chooses the best model from a set of candidates based on
+  performance
+  - Needed when multiple hypotheses can explain the data
+
+- Certain parameters are fixed, while others need to be picked, e.g.,
+  - Set of features
+    - E.g., selecting a subset of features from a dataset with 100 variables
+  - Learning algorithms
+    - E.g., deciding how to train a neural network
+  - Model types
+    - E.g., linear regression model vs. Support Vector Machine (SVM)
+  - Model complexity
+    - E.g., models with polynomials of degree $d < 10$
+  - Values of the regularization parameter
+    - E.g., trying different values like 0.01, 0.1, and 1.0
+
+- Evaluate model accuracy, precision, and recall
+- Perform cross-validation to assess model performance
+- Consider computational cost
+  - E.g., a simple logistic regression is faster than a complex neural network
+
+- **Model Selection Process**
+
+1. Split data into $D_{train}, D_{val}, D_{test}$
+   - Commonly: 60\% training, 20\% validation, 20\% test
+   - Like splitting 80\% training between two learning phases
+
+2. Given $N$ hypotheses, learn on $D_{train}$ to get $g_1, ..., g_N$
+
+3. Evaluate hypotheses on $D_{val}$ estimating errors
+   $E_{val}^{(1)}, ..., E_{val}^{(N)}$
+
+4. Pick model $g_m$ with minimum $E_{val}^{(m)}$
+
+5. Use test set $D_{test}$ to estimate fair performance of model $g_m$, i.e.,
+   $E_{val} \approx E_{out}$
+
+6. Retrain model with entire $D = D_{train} \cup D_{val} \cup D_{test}$ to get
+   final $g_m^{*}$
+
+
+- **Model Selection as Learning**
+- "Picking the model with smallest $E_{val}$" is a form of learning:
+  - Hypothesis set: $\{g_1, ... , g_N\}$
+  - Training set: $D_{val}$
+  - Pick the best model $g_m$
+
+- After model selection
+  - Experimentally $E_{val}(g_m) < E_{out}(g_m)$, i.e., $E_{val}(g_m)$ is a
+    (optimistically) biased estimate of $E_{out}(g_m)$
+  - Theoretically:
+    - The penalty for model complexity with a finite set of hypotheses is
+      $$
+      E_{out}(g_m) \le E_{val}(g_m) + O(\sqrt{\log(N / K)})
+      $$
+    - Use VC dimension for an infinite number of hypotheses (e.g., choice of
+      $\lambda$ for regularization)
+
+
+## Aggregation
+
+- **Ensemble Learning: Intuition**
+- Ensemble learning combines multiple models to improve prediction accuracy
+  - **Idea**: a group of weak learners can form a strong learner
+
+- Combine outputs of models $X_i$ to build a model $X^*$ better than any $X_i$,
+  with the wisdom of all
+  - Utilizes diversity in model predictions to improve accuracy
+  - Each model contributes its unique perspective, reducing overfitting
+  - E.g., like a panel of voting experts
+
+- Example: in computer vision detecting a face is difficult task (at least
+  circa 2010)
+  - Look for different features:
+    - Are there eyes?
+    - Is there a nose?
+    - Are eyes and nose in the correct position?
+    - ...
+  - Each feature is weak per-se, but together they become reliable
+
+- **Ensemble Learning: Different Techniques**
+- **Bagging** (bootstrap + aggregation)
+  - Reduces variance by averaging predictions from different models
+  - E.g., decision trees $\to$ bagging $\to$ random forest
+    - Bagging creates multiple versions of a decision tree (each trained on a
+      random sample of data)
+    - Average their predictions to improve accuracy
+
+- **Boosting**
+  - Reduces bias by focusing on errors made by previous models
+  - Sequentially adds models, each correcting its predecessor
+  - E.g., `adaBoost` increases weights of incorrectly classified data points to
+    learn the next model
+
+- **Stacking**
+  - Uses a meta-model to combine separate models using weights
+  - E.g., a stacking ensemble
+    - Uses a logistic regression as a meta-model
+    - Combines the predictions of other models (e.g., decision trees, support
+      vector machines, and neural networks)
+
+- **Ensemble Learning: Relation with Statistics**
+- **Bagging**
+  - Improves performance by adding randomized variants (mimicking multiple
+    training sets)
+  - Reduce variance without affecting bias
+
+- **Boosting**
+  - Use another model to learn residuals, i.e., difference between predicted and
+    true values
+  - Related to the statistical technique of "forward stagewise additive models"
+
+- **Stacking**
+  - If we have 3 independent classifiers, each with $\Pr(\text{correct}) = 0.7$
+    \begin{alignat*}{2}
+    \Pr(\text{majority correct})
+    &= \Pr(\text{at least 2 classifiers correct}) \\
+    &= {3 \choose 2} 0.7^2 0.3 + 0.7^3 \\
+    &= 3 \times 0.7^2 \times 0.3 + 0.7^3 \\
+    &\approx 0.78 > 0.7
+    \end{alignat*}
+
+- **Ensemble learning: pros and cons**
+- **Pros**
+  - Hypothesis set $\calH$ is increased by combining hypotheses from different
+    models
+
+- **Cons**
+  - More computationally intensive to train and evaluate
+  - Loss of interpretability
+  - Risk of overfitting (model complexity is increased)
+  - Ensemble learning contradicts Occam's razor, which advocates simplicity
+
+- **When Ensemble Learning Works**
+- Combining multiple models with ensemble learning works when models:
+  - Are very different from each other
+  - Treat a reasonable percentage of the data correctly
+    - E.g., one cannot do much if all classifiers have 50% accuracy
+  - Complement each other: they are specialists in a part of the domain where
+    the others don't perform well
+
+- **How to Combine Outputs in Ensemble Learning**
+- **Regression**
+  - Weighted average of prediction
+  - E.g., by accuracy of each model or by a prior
+
+- **Classification**
+  - Weighted vote of predicted classes
+  - It needs an odd number of models to break ties
+
+- **Probabilistic classification**
+  - Weighted average of class probabilities
+
+- We can also learn a meta-learner (stacking) to combine multiple models
+
+### Bagging
+
+- **Bagging**
+- Bagging stands for "Bootstrap AGGregation"
+
+- **Learning procedure**
+  - Several training datasets are extracted randomly by sampling with
+    replacement from the original dataset (i.e., bootstrap)
+  - Learn multiple models, one for each training set
+  - Combine outputs using various methods
+  - Result is a better model than a single model
+
+- **Why bagging works?**
+  - From the bias-variance decomposition view, combining multiple models:
+    - Reduces the variance component
+    - Without compromising the bias (bagged models are typically unbiased)
+  - Bagging mimics extracting more training sets (though not independent) from
+    the unknown distribution
+
+- **Bagging and Instability in Learning Algorithms**
+- Bagging works best with different models, especially non-linear models
+
+- Introduce randomization in the learning algorithm intentionally
+
+- **Decision Trees**
+  - Disable pruning
+  - Break ties randomly when selecting the best attribute to split
+  - E.g., bagging trees results in random forests
+
+- **Multilayer Perceptrons**
+  - Use different initial weights in backpropagation to reach different local
+    minima
+
+- **Nearest Neighbor Classifier**
+  - Use a random subset of features
+  - Resampling the training set has limited impact, as it is equivalent to
+    changing example weights
+
+### Boosting
+
+- **Boosting**
+- Boosting builds models that complement each other
+  - Typically use homogeneous models, i.e., parametrized models from $\calH$
+
+- Strong classifiers can be built from weak classifiers
+  - E.g., decision stumps = decision trees with one level
+
+- Statistical meaning of boosting:
+  - Boosting implements forward stagewise additive modeling
+  - Use another model to learn residuals (difference between predicted and true
+    values)
+
+- Boosting does not work for linear regression:
+  - Combination of linear models is still a linear model
+  - OLS finds optimal weights in one step
+  - Combining linear regressions from different attributes is equivalent to a
+    single multiple linear regression
+
+- **Adaboost.M1**
+- Widely used for classification
+- Assume examples can be weighted in the cost function used to learn
+  - Otherwise use resampling
+
+- **Learning procedure**
+  - Start with equal weights for all examples
+  - Iterate:
+    - Learn a classifier based on current weights for examples
+    - Weight the answer of each model by overall score (e.g., accuracy) or
+      probability
+    - Evaluate the ensemble
+    - Adjust weights for examples classified correctly/incorrectly
+
+### Stacking
+
+- **Stacking**
+- Stacking learns how to combine models (not necessarily of the same type)
+
+- The problem is that with voting / averaging we don't know which model to trust
+- Instead of voting or weighting we can use a meta-learner (level 1) to learn
+  how to pick / mix models (level 0)
+
+- **Learning procedure**
+  - Learn "level 0" models
+  - Learn "level 1" model using hold-out data from learning of level 0 models
+    (like in model selection)
+    - Build training data with predicted values from level 0 models
+    - Then learn level 1
+    - Use a simple model for level 1 (e.g., linear models or trees) to avoid
+      overfitting
+    - Use probabilities from level 0, so level 1 can assess the confidence of
+      each model
+
+- **Boosting vs Bagging vs Stacking**
+
+\begingroup \scriptsize
+
+| **Aspect**               | **Bagging**                         | **Boosting**                       | **Stacking**                             |
+| ------------------------ | ----------------------------------- | ---------------------------------- | ---------------------------------------- |
+| **Combines**             | Models of the same type             | Models of the same type            | Models of different types                |
+|                          |                                     |                                    |
+| **Learning**             | Models trained independently        | Iterative training                 | Models trained independently             |
+| **Predicting**           | Uses uniform or data-driven weights | Uses learned weights from training | Uses learned weights or confidence       |
+| **Main Objective**       | Reduce variance                     | Reduce bias                        | Improve generalization through diversity |
+| **Base Learners**        | Often strong learners               | Often weak learners                | Any model type (heterogeneous ensemble)  |
+| **Sensitivity to Noise** | Low                                 | High                               | Medium                                   |
+| **Parallelizable**       | Yes                                 | No (sequential dependency)         | Partially (base models parallelized)     |
+| **Meta-model**           | Not used                            | Not used                           | Required                                 |
+|                          |                                     |                                    |
+| **Examples**             | Random Forest                       | AdaBoost, Gradient Boosting        | Stacked Generalization, Blending         |
+
+\endgroup
diff --git a/dev_scripts_helpers/documentation/test/test_lint_notes.py b/dev_scripts_helpers/documentation/test/test_lint_notes.py
index 3f769fd9f..d28d2a5de 100644
--- a/dev_scripts_helpers/documentation/test/test_lint_notes.py
+++ b/dev_scripts_helpers/documentation/test/test_lint_notes.py
@@ -190,7 +190,7 @@ def test_process3(self) -> None:
 
         # Good
 
-        - Good Time Management
+        - Good time management
           1. Choose the right tasks
           - Avoid non-essential tasks
 
@@ -245,13 +245,13 @@ def test_process_prettier_bug1(self) -> None:
         txt = self._get_text_problematic_for_prettier1()
         actual = hdocexec.prettier_on_str(txt, file_type="txt")
         expected = r"""
-        - Python Formatting
+        - Python formatting
 
         * Python has several built-in ways of formatting strings
           1. `%` format operator
           2. `format` and `str.format`
 
-        - `%` Format Operator
+        - `%` format operator
 
         * Text template as a format string
           - Values to insert are provided as a value or a `tuple`
@@ -336,7 +336,9 @@ def _helper_process(
         file_name = os.path.join(self.get_scratch_space(), file_name)
         actual = dshdlino._process(txt, file_name)
         if expected:
-            expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=True)
+            expected = hprint.dedent(
+                expected, remove_lead_trail_empty_lines_=True
+            )
             self.assert_equal(actual, expected)
         return actual
 
diff --git a/dev_scripts_helpers/documentation/test/test_preprocess_notes.py b/dev_scripts_helpers/documentation/test/test_preprocess_notes.py
index 1d6646211..cdfd4d8e6 100644
--- a/dev_scripts_helpers/documentation/test/test_preprocess_notes.py
+++ b/dev_scripts_helpers/documentation/test/test_preprocess_notes.py
@@ -16,182 +16,15 @@
 _LOG = logging.getLogger(__name__)
 
 
-# TODO(gp): Pass through the function and not only executable.
-def _run_preprocess_notes(in_file: str, out_file: str) -> str:
-    """
-    Execute the end-to-end flow for `preprocess_notes.py` returning the output
-    as string.
-    """
-    exec_path = hgit.find_file_in_git_tree("preprocess_notes.py")
-    hdbg.dassert_path_exists(exec_path)
-    #
-    hdbg.dassert_path_exists(in_file)
-    #
-    cmd = []
-    cmd.append(exec_path)
-    cmd.append(f"--input {in_file}")
-    cmd.append(f"--output {out_file}")
-    cmd.append("--type pdf")
-    cmd_as_str = " ".join(cmd)
-    hsystem.system(cmd_as_str)
-    # Check.
-    actual = hio.from_file(out_file)
-    return actual  # type: ignore
-
-
-# #############################################################################
-# Test_process_color_commands1
-# #############################################################################
-
-
-class Test_process_color_commands1(hunitest.TestCase):
-    def test_text_content1(self) -> None:
-        """
-        Test with plain text content.
-        """
-        txt_in = r"\red{Hello world}"
-        expected = r"\textcolor{red}{\text{Hello world}}"
-        actual = hmarkdo.process_color_commands(txt_in)
-        self.assert_equal(actual, expected)
-
-    def test_math_content1(self) -> None:
-        """
-        Test color command with mathematical content.
-        """
-        txt_in = r"\blue{x + y = z}"
-        expected = r"\textcolor{blue}{x + y = z}"
-        actual = hmarkdo.process_color_commands(txt_in)
-        self.assert_equal(actual, expected)
-
-    def test_multiple_colors1(self) -> None:
-        """
-        Test multiple color commands in the same line.
-        """
-        txt_in = r"The \red{quick} \blue{fox} \green{jumps}"
-        expected = r"The \textcolor{red}{\text{quick}} \textcolor{blue}{\text{fox}} \textcolor{darkgreen}{\text{jumps}}"
-        actual = hmarkdo.process_color_commands(txt_in)
-        self.assert_equal(actual, expected)
-
-    def test_mixed_content1(self) -> None:
-        """
-        Test color commands with both text and math content.
-        """
-        txt_in = r"\red{Result: x^2 + y^2}"
-        expected = r"\textcolor{red}{Result: x^2 + y^2}"
-        actual = hmarkdo.process_color_commands(txt_in)
-        self.assert_equal(actual, expected)
-
-    def test_nested_braces1(self) -> None:
-        """
-        Test color command with nested braces.
-        """
-        txt_in = r"\blue{f(x) = {x + 1}}"
-        expected = r"\textcolor{blue}{f(x) = {x + 1}}"
-        actual = hmarkdo.process_color_commands(txt_in)
-        self.assert_equal(actual, expected)
-
-
-# #############################################################################
-# Test_colorize_bullet_points1
-# #############################################################################
-
-
-@pytest.mark.skip(reason="Broken for now")
-class Test_colorize_bullet_points1(hunitest.TestCase):
-    def helper(self, txt_in: str, expected: str) -> None:
-        """
-        Test colorize bullet points.
-        """
-        txt_in = hprint.dedent(txt_in)
-        actual = hmarkdo.colorize_bullet_points(txt_in)
-        expected = hprint.dedent(expected)
-        self.assert_equal(actual, expected)
-
-    def test1(self) -> None:
-        """
-        Test colorize bullet points.
-        """
-        txt_in = r"""
-        - **VC Theory**
-            - Measures model
-
-        - **Bias-Variance Decomposition**
-            - Prediction error
-                - **Bias**
-                - **Variance**
-
-        - **Computation Complexity**
-            - Balances model
-            - Related to
-            - E.g., Minimum
-
-        - **Bayesian Approach**
-            - Treats ML as probability
-            - Combines prior knowledge with observed data to update belief about a model
-
-        - **Problem in ML Theory:**
-            - Assumptions may not align with practical problems
-        """
-        expected = r"""
-        - **\red{VC Theory}**
-            - Measures model
-
-        - **\orange{Bias-Variance Decomposition}**
-            - Prediction error
-                - **\yellow{Bias}**
-                - **\lime{Variance}**
-
-        - **\green{Computation Complexity}**
-            - Balances model
-            - Related to
-            - E.g., Minimum
-
-        - **\teal{Bayesian Approach}**
-            - Treats ML as probability
-            - Combines prior knowledge with observed data to update belief about a model
-
-        - **\cyan{Problem in ML Theory:}**
-            - Assumptions may not align with practical problems
-        """
-        self.helper(txt_in, expected)
-
-
-# #############################################################################
-# Test_preprocess_notes1
-# #############################################################################
-
-
-@pytest.mark.skipif(
-    hserver.is_inside_ci() or hserver.is_dev_csfy(),
-    reason="Disabled because of CmampTask10710",
-)
-class Test_preprocess_notes1(hunitest.TestCase):
-    """
-    Test `preprocess_notes.py` using the executable and checked in files.
-    """
-
-    def test1(self) -> None:
-        self._helper()
-
-    def _helper(self) -> None:
-        # Set up.
-        in_file = os.path.join(self.get_input_dir(), "input1.txt")
-        out_file = os.path.join(self.get_scratch_space(), "output.txt")
-        # Run.
-        actual = _run_preprocess_notes(in_file, out_file)
-        # Check.
-        self.check_string(actual)
-
-
 # #############################################################################
 # Test_process_question1
 # #############################################################################
 
 
-@pytest.mark.skipif(
-    hserver.is_inside_ci() or hserver.is_dev_csfy(),
-    reason="Disabled because of CmampTask10710",
-)
+# @pytest.mark.skipif(
+#     hserver.is_inside_ci() or hserver.is_dev_csfy(),
+#     reason="Disabled because of CmampTask10710",
+# )
 class Test_process_question1(hunitest.TestCase):
     """
     Check that the output of `preprocess_notes.py` is the expected one calling
@@ -201,45 +34,43 @@ class Test_process_question1(hunitest.TestCase):
     def test_process_question1(self) -> None:
         txt_in = "* Hope is not a strategy"
         do_continue_exp = True
-        expected = "- **Hope is not a strategy**"
-        self._helper_process_question(txt_in, do_continue_exp, expected)
+        exp = "- **Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, exp)
 
     def test_process_question2(self) -> None:
         txt_in = "** Hope is not a strategy"
         do_continue_exp = True
-        expected = "- **Hope is not a strategy**"
-        self._helper_process_question(txt_in, do_continue_exp, expected)
+        exp = "- **Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, exp)
 
     def test_process_question3(self) -> None:
         txt_in = "*: Hope is not a strategy"
         do_continue_exp = True
-        expected = "- **Hope is not a strategy**"
-        self._helper_process_question(txt_in, do_continue_exp, expected)
+        exp = "- **Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, exp)
 
     def test_process_question4(self) -> None:
         txt_in = "- Systems don't run themselves, they need to be run"
         do_continue_exp = False
-        expected = txt_in
-        self._helper_process_question(txt_in, do_continue_exp, expected)
+        exp = txt_in
+        self.helper(txt_in, do_continue_exp, exp)
 
     def test_process_question5(self) -> None:
         space = "   "
         txt_in = "*" + space + "Hope is not a strategy"
         do_continue_exp = True
-        expected = "-" + space + "**Hope is not a strategy**"
-        self._helper_process_question(txt_in, do_continue_exp, expected)
+        exp = "-" + space + "**Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, exp)
 
     def test_process_question6(self) -> None:
         space = "   "
         txt_in = "**" + space + "Hope is not a strategy"
         do_continue_exp = True
-        expected = "-" + " " * len(space) + "**Hope is not a strategy**"
-        self._helper_process_question(txt_in, do_continue_exp, expected)
+        exp = "-" + " " * len(space) + "**Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, exp)
 
-    def _helper_process_question(
-        self, txt_in: str, do_continue_exp: bool, expected: str
-    ) -> None:
-        do_continue, actual = dshdprno._process_question_to_markdown(txt_in)
+    def helper(self, txt_in: str, do_continue_exp: bool, exp: str) -> None:
+        do_continue, act = dshdprno._process_question_to_markdown(txt_in)
         self.assertEqual(do_continue, do_continue_exp)
         self.assert_equal(actual, expected)
 
@@ -249,17 +80,19 @@ def _helper_process_question(
 # #############################################################################
 
 
-@pytest.mark.skipif(
-    hserver.is_inside_ci() or hserver.is_dev_csfy(),
-    reason="Disabled because of CmampTask10710",
-)
-class Test_preprocess_notes3(hunitest.TestCase):
+# @pytest.mark.skipif(
+#     hserver.is_inside_ci() or hserver.is_dev_csfy(),
+#     reason="Disabled because of CmampTask10710",
+# )
+class Test_preprocess_notes_end_to_end1(hunitest.TestCase):
     """
-    Check that the output of `preprocess_notes.py` is the expected one calling
-    the library function directly.
+    Test `preprocess_notes.py` by calling the library function directly.
     """
 
     def test_run_all1(self) -> None:
+        """
+        Test type_="pdf".
+        """
         # Prepare inputs.
         txt_in = r"""
         # #############################################################################
@@ -310,5 +143,86 @@ def _is_integer(value):
                         print(v)
             ```
         """
-        expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=True)
-        self.assert_equal(actual, expected)
+        exp = hprint.dedent(exp, remove_lead_trail_empty_lines_=True)
+        self.assert_equal(act, exp)
+
+    def test_run_all2(self) -> None:
+        """
+        Test type_="slides".
+        """
+        # Prepare inputs.
+        txt_in = os.path.join(self.get_input_dir(), "input.txt")
+        txt_in = hprint.dedent(txt_in, remove_lead_trail_empty_lines_=True)
+        # Run function.
+        type_ = "slides"
+        act = dshdprno._transform_lines(txt_in, type_, is_qa=False)
+        # Check.
+        self.check_string(act)
+
+
+# #############################################################################
+# Test_preprocess_notes1
+# #############################################################################
+
+
+@pytest.mark.skipif(
+    hserver.is_inside_ci() or hserver.is_dev_csfy(),
+    reason="Disabled because of CmampTask10710",
+)
+class Test_preprocess_notes_executable1(hunitest.TestCase):
+    """
+    Test `preprocess_notes.py` using the executable and checked in files.
+    """
+
+    @staticmethod
+    def helper(in_file: str, out_file: str, type_: str) -> str:
+        """
+        Execute the end-to-end flow for `preprocess_notes.py` returning the output
+        as string.
+        """
+        hdbg.dassert_path_exists(in_file)
+        # Find executable.
+        exec_path = hgit.find_file_in_git_tree("preprocess_notes.py")
+        hdbg.dassert_path_exists(exec_path)
+        # Prepare command.
+        cmd = []
+        cmd.append(exec_path)
+        cmd.append(f"--input {in_file}")
+        cmd.append(f"--output {out_file}")
+        cmd.append(f"--type {type_}")
+        cmd_as_str = " ".join(cmd)
+        # Run.
+        hsystem.system(cmd_as_str)
+        # Check.
+        act = hio.from_file(out_file)
+        return act  # type: ignore
+
+    def test1(self) -> None:
+        # Prepare inputs.
+        in_file = os.path.join(self.get_input_dir(), "input1.txt")
+        out_file = os.path.join(self.get_scratch_space(), "output.txt")
+        type_ = "pdf"
+        # Run.
+        act = self.helper(in_file, out_file, type_)
+        # Check.
+        self.check_string(act)
+
+    def test2(self) -> None:
+        # Prepare inputs.
+        in_file = os.path.join(self.get_input_dir(), "input1.txt")
+        out_file = os.path.join(self.get_scratch_space(), "output.txt")
+        type_ = "pdf"
+        # Run.
+        act = self.helper(in_file, out_file, type_)
+        # Check.
+        self.check_string(act)
+
+    def test3(self) -> None:
+        # Prepare inputs.
+        in_file = os.path.join(self.get_input_dir(), "input1.txt")
+        out_file = os.path.join(self.get_scratch_space(), "output.txt")
+        type_ = "pdf"
+        # Run.
+        act = self.helper(in_file, out_file, type_)
+        # Check.
+        self.check_string(act)
diff --git a/dev_scripts_helpers/documentation/test/test_render_images.py b/dev_scripts_helpers/documentation/test/test_render_images.py
index e60a1486b..ff791189d 100644
--- a/dev_scripts_helpers/documentation/test/test_render_images.py
+++ b/dev_scripts_helpers/documentation/test/test_render_images.py
@@ -103,7 +103,10 @@ def test2(self) -> None:
         image code type.
         """
         # Prepare inputs.
-        image_code = "digraph { B -> A }"
+        image_code = """
+        graph TD
+            B --> A
+        """
         image_code_idx = 1
         image_code_type = "mermaid"
         template_out_file = os.path.join(self.get_scratch_space(), "test.md")
diff --git a/dev_scripts_helpers/llms/ai_review.py b/dev_scripts_helpers/llms/ai_review.py
index f55e0c7a5..e9250ba7f 100755
--- a/dev_scripts_helpers/llms/ai_review.py
+++ b/dev_scripts_helpers/llms/ai_review.py
@@ -80,9 +80,10 @@ def _main(parser: argparse.ArgumentParser) -> None:
     )
     # Run post-transforms outside the container.
     if not args.skip_post_transforms:
+        compare = False
         out_txt = dshlllut.run_post_transforms(
             args.prompt,
-            args.compare,
+            compare,
             in_file_name,
             tmp_in_file_name,
             tmp_out_file_name,
diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py
index 7f0ae299c..46d0e5dc1 100755
--- a/dev_scripts_helpers/llms/llm_transform.py
+++ b/dev_scripts_helpers/llms/llm_transform.py
@@ -83,6 +83,7 @@ def _parse() -> argparse.ArgumentParser:
     return parser
 
 
+# TODO(gp): Make it public and move it to `hdockerized_executables.py`.
 def _run_dockerized_llm_transform(
     in_file_path: str,
     cmd_opts: List[str],
diff --git a/docs/code_guidelines/all.coding_style_guidelines.reference.md b/docs/code_guidelines/all.coding_style_guidelines.reference.md
index 263ff527e..7ddf92eda 100644
--- a/docs/code_guidelines/all.coding_style_guidelines.reference.md
+++ b/docs/code_guidelines/all.coding_style_guidelines.reference.md
@@ -414,9 +414,6 @@
 - Use `isinstance()` instead of `type()` to check the type of an object
   - Good: `if isinstance(obj, str):`
   - Bad: `if type(obj) == str:`
-- Do not use `import *`
-  - Good: `from math import sqrt, pi`
-  - Bad: `from math import *`
 - Do not use `from ... import ...`, unless it is the `typing` package, e.g.,
   `from typing import Iterable, List`
   - Good: `from typing import Dict, Tuple`
diff --git a/docs/tools/all.ai_review.how_to_guide.md b/docs/tools/all.ai_review.how_to_guide.md
index 16fa10013..989ed704d 100644
--- a/docs/tools/all.ai_review.how_to_guide.md
+++ b/docs/tools/all.ai_review.how_to_guide.md
@@ -27,26 +27,26 @@
   - Apply modifications from a `cfile` to a set of files
     - E.g., from linter and AI review
   - Add TODOs from a `cfile` to Python or markdown files
-  - Apply a set of transformations to an entire file
+  - Apply a set of transformations to an entire Python file
     - E.g., styling / formatting code
   - Rewrite an entire markdown to fix English mistakes without changing its
     structure
-  - Reformat an entire markdown or Python using LLMs or code
+    - E.g., styling / formatting a markdown
 
-- You should always commit your code before applying automatic transforms (e.g.,
-  linting)
+- You should always commit your code before applying automatic transforms, in the
+  same way that we run the `linter` on a clean tree
   - In this way, modifying a file is a separate commit and it's easy to review
 
 # Use templates
 
-- We use templates for code and documentation to show and describe how a
-  document or code should look like, e.g.,
+- We use templates for code and documentation to show and describe how a document
+  or code should look like, e.g.,
   - `code_template.py` shows our coding style
   - `unit_test_template.py` shows how our unit tests look like
-  - `all.how_to_guide_template_doc.md` shows how a Diataxis how to guide should be
-    structured and look like (same for `explanation`, `tutorial`, `reference`)
+  - `all.how_to_guide_template_doc.md` shows how a Diataxis how to guide should
+    be structured and look like (same for `explanation`, `tutorial`, `reference`)
 
-- The same template can have multiple applications for:
+- The same templates can have multiple applications for:
   - Humans:
     - Understand how to write documentation and code
     - As boilerplate
@@ -108,7 +108,45 @@
 
 ## `ai_review.py`
 
-./docs/code_guidelines/all.coding_style_guidelines.reference.md
+- The rules for AI are saved in the file
+  ./docs/code_guidelines/all.coding_style_guidelines.reference.md
+- This file has a special structure:
+  ```bash
+  > extract_headers_from_markdown.py -i ./docs/code_guidelines/all.coding_style_guidelines.reference.md --max_level 2
+  - All Style Guide
+    - Summary
+  - General
+    - Spelling
+  - Python
+    - Naming
+    - Docstrings
+    - Comments
+    - Code Implementation
+    - Code Design
+    - Imports
+    - Type Annotations
+    - Functions
+    - Scripts
+    - Logging
+    - Misc
+  - Unit Tests
+    - Rules
+  - Notebooks
+    - General
+    - Plotting
+    - Jupytext
+  - Markdown
+    - General
+    - Headers
+    - Text
+  ```
+  - The first level represents the target language (e.g. `General`, `Python`)
+  - The second level represents a rule topic (e.g., `Imports`, `Functions`)
+  - The third level represents instructions for an LLM vs Linter, since some
+    instructions:
+    - Are easier to enforce by an LLM
+    - While others should be enforced by the `linter` (even if they are temporary not
+      enforced by the `linter` but by LLM or by humans)
 
 ## `inject_todos.py`
 
@@ -137,10 +175,10 @@
 ## A reviewer workflow
 
 - This workflow can be used by the author of the code directly or by a reviewer
+  - Initially, reviewers use these tools as part of dogfooding of the workflows
   - The goal is to make these tools robust enough so that they can be used
     directly by the author and potentially integrated in the `linter` flow
     itself
-  - Initially, reviewers use these tools as part of dogfooding of the workflows
 
 - Go to the Git branch with the code to review
 
diff --git a/helpers/hcache_simple.py b/helpers/hcache_simple.py
index 9931b3a2a..c45645886 100644
--- a/helpers/hcache_simple.py
+++ b/helpers/hcache_simple.py
@@ -272,7 +272,7 @@ def _save_cache_dict_to_disk(func_name: str, data: Dict) -> None:
             pickle.dump(data, file)
     elif cache_type == "json":
         with open(file_name, "w", encoding="utf-8") as file:
-            json.dump(data, file)
+            json.dump(data, file, indent=4, sort_keys=True, ensure_ascii=False)
     else:
         raise ValueError(f"Invalid cache type '{cache_type}'")
 
diff --git a/helpers/hllm.py b/helpers/hllm.py
index 37cb2285f..06c77521d 100644
--- a/helpers/hllm.py
+++ b/helpers/hllm.py
@@ -489,7 +489,7 @@ def get_completion(
     :param model: model to use or empty string to use the default model
     :param report_progress: whether to report progress running the API
         call
-    :param cache_mode : "DISABLE_CACHE","REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"
+    :param cache_mode:
         - "DISABLE_CACHE": No caching
         - "REFRESH_CACHE": Make API calls and save responses to cache
         - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache
@@ -508,7 +508,6 @@ def get_completion(
     update_llm_cache = get_update_llm_cache()
     if update_llm_cache:
         cache_mode = "REFRESH_CACHE"
-
     llm_client = LLMClient(model=model)
     llm_client.create_client()
     # Construct messages in OpenAI API request format.
diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py
index 98ff2fbf3..c7babad0f 100644
--- a/helpers/hmarkdown.py
+++ b/helpers/hmarkdown.py
@@ -13,3 +13,4 @@
 from helpers.hmarkdown_headers import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
 from helpers.hmarkdown_rules import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
 from helpers.hmarkdown_slides import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
+from helpers.hmarkdown_toc import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
diff --git a/helpers/hmarkdown_coloring.py b/helpers/hmarkdown_coloring.py
index ec989ecf9..4cfe8cdaf 100644
--- a/helpers/hmarkdown_coloring.py
+++ b/helpers/hmarkdown_coloring.py
@@ -6,6 +6,7 @@
 
 import logging
 import re
+from typing import List, Optional
 
 import helpers.hdbg as hdbg
 from helpers.hmarkdown_fenced_blocks import (
@@ -15,35 +16,55 @@
 
 _LOG = logging.getLogger(__name__)
 
-# TODO(gp): Add a decorator like in hprint to process both strings and lists
-#  of strings.
 
 # #############################################################################
 # Colorize
 # #############################################################################
 
 # Define colors and their LaTeX equivalents.
-_COLORS = {
+_MD_COLORS_LATEX_MAPPING = {
     "red": "red",
     "orange": "orange",
-    # "yellow": "yellow",
-    # "lime": "lime",
-    #
+    "yellow": "yellow",
+    "lime": "lime",
     "green": "darkgreen",
     "teal": "teal",
     "cyan": "cyan",
     "blue": "blue",
-    # "purple": "purple",
+    "purple": "purple",
     "violet": "violet",
     "magenta": "magenta",
-    # "pink": "pink",
+    "pink": "pink",
     "brown": "brown",
     "olive": "olive",
     "gray": "gray",
     "darkgray": "darkgray",
-    # "lightgray": "lightgray",
-    # "black": "black",
-    # "white": "white",
+    "lightgray": "lightgray",
+    "black": "black",
+    "white": "white",
+}
+
+
+_MD_COLORS = {
+    "red",
+    "orange",
+    # "yellow",
+    # "lime",
+    "green",
+    "teal",
+    "cyan",
+    "blue",
+    # "purple",
+    "violet",
+    "magenta",
+    # "pink",
+    "brown",
+    "olive",
+    "gray",
+    "darkgray",
+    # "lightgray",
+    # "black",
+    # "white",
 }
 
 
@@ -60,11 +81,12 @@ def process_color_commands(in_line: str) -> str:
     :param in_line: input line to process
     :return: line with color commands transformed
     """
-    for color, value in _COLORS.items():
-        # This regex matches LaTeX color commands like \red{content}, \blue{content}, etc.
+    for md_color, latex_color in _MD_COLORS_LATEX_MAPPING.items():
+        # This regex matches color commands like \red{content}, \blue{content},
+        # etc.
         pattern = re.compile(
             rf"""
-            \\{color}    # Match the color command (e.g., \red, \blue, etc.).
+            \\{md_color}    # Match the color command (e.g., \red, \blue, etc.).
             \{{          # Match the opening curly brace.
             ([^}}]*)     # Capture everything inside the curly braces.
             \}}          # Match the closing curly brace.
@@ -72,32 +94,36 @@ def process_color_commands(in_line: str) -> str:
             re.VERBOSE,
         )
 
-        def _replacement(match: re.Match, value: str) -> str:
+        def _replacement(match: re.Match, latex_color: str) -> str:
             content = match.group(1)
-            # Check if content appears to be math expression.
-            is_math = any(c in content for c in "+-*/=<>{}[]()^_")
-            if is_math:
-                ret = rf"\textcolor{{{value}}}{{{content}}}"
+            # Check if content appears to be a math expression, otherwise wrap
+            # it in `\text{}`.
+            is_math_expr = any(c in content for c in "+-*/=<>{}[]()^_")
+            if is_math_expr:
+                ret = rf"\textcolor{{{latex_color}}}{{{content}}}"
             else:
-                ret = rf"\textcolor{{{value}}}{{\text{{{content}}}}}"
+                ret = rf"\textcolor{{{latex_color}}}{{\text{{{content}}}}}"
             return ret
 
         # Replace the color command with the LaTeX color command.
-        in_line = re.sub(pattern, lambda m: _replacement(m, value), in_line)
+        in_line = re.sub(
+            pattern, lambda m: _replacement(m, latex_color), in_line
+        )
     return in_line
 
 
-def has_color_command(line: str) -> bool:
+def has_color_command(text: str) -> bool:
     """
     Check if line contains any color commands.
 
     :param line: line to check
     :return: whether the line contains color commands
     """
-    hdbg.dassert_isinstance(line, str)
+    hdbg.dassert_isinstance(text, str)
     # hdbg.dassert_not_in("\n", line)
-    for color in _COLORS.keys():
-        # This regex matches LaTeX color commands like \red{content}, \blue{content}, etc.
+    for color in _MD_COLORS_LATEX_MAPPING.keys():
+        # This regex matches LaTeX color commands like \red{content},
+        # \blue{content}, etc.
         pattern = re.compile(
             rf"""
             \\{color}    # Match the color command (e.g., \red, \blue, etc.).
@@ -107,7 +133,7 @@ def has_color_command(line: str) -> bool:
             """,
             re.VERBOSE,
         )
-        if re.search(pattern, line):
+        if re.search(pattern, text):
             return True
     return False
 
@@ -115,17 +141,26 @@ def has_color_command(line: str) -> bool:
 # TODO(gp): -> List[str]
 # TODO(gp): Use hmarkdown.process_lines() and test it.
 def colorize_bullet_points_in_slide(
-    txt: str, *, use_abbreviations: bool = True
+    txt: str,
+    *,
+    use_abbreviations: bool = True,
+    interpolate_colors: bool = False,
+    all_md_colors: Optional[List[str]] = None,
 ) -> str:
-    """
+    r"""
     Colorize bold text in a given string.
 
     :param txt: text to colorize
     :param use_abbreviations: use abbreviations for the colors like
         `\red{foo}` instead of `\textcolor{red}{foo}`
+    :param interpolate_colors: interpolate the colors to use for the
+        bold items instead of using a fixed set of colors
+    :param all_colors: list of colors to use for the bold items
     :return: colored text
     """
     hdbg.dassert_isinstance(txt, str)
+    if all_md_colors is None:
+        all_md_colors = list(_MD_COLORS)
     # Replace fenced code blocks with tags.
     lines = txt.split("\n")
     lines, fence_map = replace_fenced_blocks_with_tags(lines)
@@ -144,12 +179,31 @@ def colorize_bullet_points_in_slide(
     # want to count `**bold**` as 1.
     hdbg.dassert_eq(tot_bold % 2, 0, "tot_bold=%s needs to be even", tot_bold)
     num_bolds = tot_bold // 2
+
     # Use the colors in the order of the list of colors.
-    hdbg.dassert_lte(num_bolds, len(_COLORS))
-    # Sample num_bolds colors evenly spaced from the available colors
-    step = len(_COLORS) // num_bolds
-    colors = list(_COLORS.keys())[::step][:num_bolds]
+    def _interpolate_colors(num_bolds: int) -> List[str]:
+        """
+        Sample `num_bolds` colors evenly spaced from the available colors.
+        """
+        step = len(all_md_colors) // num_bolds
+        colors = list(all_md_colors)[::step][:num_bolds]
+        return colors
+
+    if interpolate_colors:
+        colors = _interpolate_colors(num_bolds)
+    else:
+        if num_bolds == 1:
+            colors = ["red"]
+        elif num_bolds == 2:
+            colors = ["red", "blue"]
+        elif num_bolds == 3:
+            colors = ["red", "green", "blue"]
+        elif num_bolds == 4:
+            colors = ["red", "green", "blue", "violet"]
+        else:
+            colors = _interpolate_colors(num_bolds)
     _LOG.debug("colors=%s", colors)
+    hdbg.dassert_lte(num_bolds, len(colors))
     # Colorize the bold items.
     color_idx = 0
     txt_out = []
@@ -163,11 +217,13 @@ def color_replacer(match: re.Match[str]) -> str:
             text = match.group(1)
             hdbg.dassert_lte(color_idx, len(colors))
             color_to_use = colors[color_idx]
+            hdbg.dassert_in(color_to_use, _MD_COLORS_LATEX_MAPPING)
+            latex_color = _MD_COLORS_LATEX_MAPPING[color_to_use]
             color_idx += 1
             if use_abbreviations:
                 ret = f"**\\{color_to_use}{{{text}}}**"
             else:
-                ret = f"**\\textcolor{{{color_to_use}}}{{{text}}}**"
+                ret = f"**\\textcolor{{{latex_color}}}{{{text}}}**"
             return ret
 
         line = re.sub(r"\*\*([^*]+)\*\*", color_replacer, line)
diff --git a/helpers/hmarkdown_rules.py b/helpers/hmarkdown_rules.py
index 7117af985..1ecc1ed1e 100644
--- a/helpers/hmarkdown_rules.py
+++ b/helpers/hmarkdown_rules.py
@@ -286,6 +286,7 @@ def extract_rules(
     return rule_sections
 
 
+# TODO(gp): This seems private?
 def parse_rules_from_txt(txt: str) -> List[str]:
     """
     Parse rules from a chunk of markdown text.
@@ -333,25 +334,26 @@ def parse_rules_from_txt(txt: str) -> List[str]:
     return bullet_points
 
 
-def extract_rules_from_section(txt: str, line_number: int) -> List[str]:
+def extract_rules_from_section(txt: str, start_line_number: int) -> List[str]:
     """
     Extract rules from a section of a markdown file.
 
     :param txt: markdown text to extract the rules from
-    :param line_number: line number of the section to start extracting
+    :param start_line_number: line number of the section to start extracting
         the rules from
     :return: extracted rules
     """
     # Find the line number of the next header.
-    i = line_number
+    end_line_number = start_line_number
     while True:
-        hdbg.dassert_lt(i, len(txt))
-        line = txt[i]
+        hdbg.dassert_lt(end_line_number, len(txt))
+        line = txt[end_line_number]
         if line.startswith("#"):
             break
-        i += 1
+        end_line_number += 1
+    _LOG.debug("end_line_number=%s", end_line_number)
     # Parse the markdown text into a list of bullet points.
-    bullet_points = parse_rules_from_txt(txt)
+    bullet_points = parse_rules_from_txt(txt[start_line_number:end_line_number])
     # Extract the rules from the bullet points.
     rules = []
     for bullet_point in bullet_points:
diff --git a/helpers/hmarkdown_slides.py b/helpers/hmarkdown_slides.py
index 2121ff736..df3e8074d 100644
--- a/helpers/hmarkdown_slides.py
+++ b/helpers/hmarkdown_slides.py
@@ -57,7 +57,7 @@ def process_slides(txt: str, transform: Callable[[List[str]], List[str]]) -> str
         # 2) Process slide.
         if _TRACE:
             _LOG.debug(" -> %s", hprint.to_str("in_slide"))
-        if line.startswith("* "):
+        if line.startswith("* ") or line.startswith("#### "):
             _LOG.debug("### Found slide")
             # Found a slide or the end of the file.
             if slide_txt:
diff --git a/helpers/hmarkdown_tables.py b/helpers/hmarkdown_tables.py
new file mode 100644
index 000000000..c73a0716f
--- /dev/null
+++ b/helpers/hmarkdown_tables.py
@@ -0,0 +1,120 @@
+"""
+Import as:
+
+import helpers.hmarkdown_tables as hmarktab
+"""
+
+import logging
+from typing import Dict, List, Tuple
+
+import helpers.hdbg as hdbg
+
+_LOG = logging.getLogger(__name__)
+
+
+def replace_tables_with_tags(
+    lines: List[str],
+) -> Tuple[List[str], Dict[str, str]]:
+    """
+    Replace markdown tables with tag and return mapping from tags to the table.
+
+    E.g.,
+        ```
+        Some text before
+        | Column 1 | Column 2 |
+        |----------|----------|
+        | Value 1  | Value 2  |
+        | Value 3  | Value 4  |
+        More text after
+        ```
+    is replaced with:
+        ```
+        Some text before
+        <table1>
+        More text after
+        ```
+
+    :param lines: list of lines to process
+    :return: tuple containing:
+        - list of lines with the tables replaced by tags
+        - mapping from tags to the table text
+    """
+    hdbg.dassert_isinstance(lines, list)
+    result = []
+    table_map = {}
+    table_count = 0
+    i = 0
+    while i < len(lines):
+        line = lines[i].strip()
+        # Check if this line starts a table (contains |).
+        if "|" in line and line.strip():
+            # Look ahead to see if next line is a separator.
+            if i + 1 < len(lines):
+                next_line = lines[i + 1].strip()
+                # Check if next line is a table separator (contains --- and |).
+                if "|" in next_line and "-" in next_line:
+                    # Found a table, collect all table lines.
+                    table_lines = []
+                    # Add header line.
+                    table_lines.append(lines[i])
+                    i += 1
+                    # Add separator line.
+                    table_lines.append(lines[i])
+                    i += 1
+                    # Add data rows (continue while lines contain |).
+                    while (
+                        i < len(lines)
+                        and "|" in lines[i].strip()
+                        and lines[i].strip()
+                    ):
+                        table_lines.append(lines[i])
+                        i += 1
+                    # Store the table.
+                    table_count += 1
+                    table_text = "\n".join(table_lines)
+                    table_map[str(table_count)] = table_text
+                    result.append(f"<table{table_count}>")
+                    continue
+        # Not a table line, add as-is.
+        result.append(lines[i])
+        i += 1
+    return result, table_map
+
+
+def replace_tags_with_tables(
+    lines: List[str], table_map: Dict[str, str]
+) -> List[str]:
+    """
+    Replace tags with markdown tables.
+
+    :param lines: list of lines to process
+    :param table_map: mapping from tags to table text
+    :return: list of lines with tags replaced by tables
+    """
+    hdbg.dassert_isinstance(lines, list)
+    hdbg.dassert_isinstance(table_map, dict)
+    result = []
+    table_map_copy = table_map.copy()
+
+    for line in lines:
+        if line.startswith("<table") and line.endswith(">"):
+            # Extract table number from tag like <table1>.
+            tag_match = line[6:-1]  # Remove '<table' and '>'
+            hdbg.dassert_in(
+                tag_match, table_map_copy, f"Found unmatched tag {tag_match}"
+            )
+            # Split table text into lines and add them.
+            table_text = table_map_copy[tag_match]
+            table_lines = table_text.split("\n")
+            result.extend(table_lines)
+            # Remove used tag from map.
+            del table_map_copy[tag_match]
+        else:
+            result.append(line)
+    # Ensure all tags were used.
+    hdbg.dassert_eq(
+        len(table_map_copy),
+        0,
+        f"Found {len(table_map_copy)} unmatched tags: {list(table_map_copy.keys())}",
+    )
+    return result
diff --git a/helpers/hmarkdown_toc.py b/helpers/hmarkdown_toc.py
new file mode 100644
index 000000000..a7600aceb
--- /dev/null
+++ b/helpers/hmarkdown_toc.py
@@ -0,0 +1,25 @@
+"""
+Import as:
+
+import helpers.hmarkdown_toc as hmarkdo
+"""
+
+import re
+
+
+def remove_table_of_contents(txt: str) -> str:
+    """
+    Remove the table of contents from the text of a markdown file.
+
+    The table of contents is stored between
+    ```
+    <!-- toc -->
+    ...
+    <!-- tocstop -->
+    ```
+
+    :param txt: Input markdown text
+    :return: Text with table of contents removed
+    """
+    txt = re.sub(r"<!-- toc -->.*?<!-- tocstop -->", "", txt, flags=re.DOTALL)
+    return txt
diff --git a/helpers/hmkdocs.py b/helpers/hmkdocs.py
index 4a7a2decf..fa90e67eb 100644
--- a/helpers/hmkdocs.py
+++ b/helpers/hmkdocs.py
@@ -7,25 +7,7 @@
 import re
 
 import helpers.hdbg as hdbg
-
-
-# TODO(gp): -> hamrkdown_toc.py
-def remove_table_of_contents(txt: str) -> str:
-    """
-    Remove the table of contents from the text of a markdown file.
-
-    The table of contents is stored between
-    ```
-    <!-- toc -->
-    ...
-    <!-- tocstop -->
-    ```
-
-    :param txt: Input markdown text
-    :return: Text with table of contents removed
-    """
-    txt = re.sub(r"<!-- toc -->.*?<!-- tocstop -->", "", txt, flags=re.DOTALL)
-    return txt
+import helpers.hmarkdown as hmarkdo
 
 
 # TODO(gp): -> hmarkdown_?.py
@@ -119,7 +101,7 @@ def preprocess_mkdocs_markdown(txt: str) -> str:
     :return: Preprocessed markdown text
     """
     # Apply all preprocessing steps.
-    txt = remove_table_of_contents(txt)
+    txt = hmarkdo.remove_table_of_contents(txt)
     txt = dedent_python_code_blocks(txt)
     txt = replace_indentation_with_four_spaces(txt)
     return txt
diff --git a/helpers/hplayback.py b/helpers/hplayback.py
index 859c5ec71..9d5a9e7d6 100644
--- a/helpers/hplayback.py
+++ b/helpers/hplayback.py
@@ -264,8 +264,12 @@ def _check_code(self, func_output: Any) -> None:
                 self._append("expected = jsonpickle.decode(expected)", 2)
 
             if isinstance(func_output, (pd.DataFrame, pd.Series)):
-                self._append("actual = hpandas.df_to_str(actual, num_rows=None)", 2)
-                self._append("expected = hpandas.df_to_str(expected, num_rows=None)", 2)
+                self._append(
+                    "actual = hpandas.df_to_str(actual, num_rows=None)", 2
+                )
+                self._append(
+                    "expected = hpandas.df_to_str(expected, num_rows=None)", 2
+                )
             self._append("# Compare actual and expected output.", 2)
             self._append("self.assertEqual(actual, expected)", 2)
         else:
diff --git a/helpers/hunit_test.py b/helpers/hunit_test.py
index d57cf3068..4b8534ded 100644
--- a/helpers/hunit_test.py
+++ b/helpers/hunit_test.py
@@ -674,7 +674,9 @@ def assert_equal(
     values: Dict[str, str] = collections.OrderedDict()
 
     def _append(tag: str, actual: str, expected: str) -> None:
-        _LOG.debug("tag=%s\n  actual='\n%s'\n  expected='\n%s'", tag, actual, expected)
+        _LOG.debug(
+            "tag=%s\n  actual='\n%s'\n  expected='\n%s'", tag, actual, expected
+        )
         hdbg.dassert_not_in(tag, values)
         values[tag] = (actual, expected)
 
diff --git a/helpers/hunit_test_purification.py b/helpers/hunit_test_purification.py
index 61014d8e6..fedaa8e12 100644
--- a/helpers/hunit_test_purification.py
+++ b/helpers/hunit_test_purification.py
@@ -300,7 +300,7 @@ def purify_line_number(self, txt: str) -> str:
 
     def purify_parquet_file_names(self, txt: str) -> str:
         """
-        Replace UUIDs file names to `data.parquet` in the goldens.
+        Replace UUIDs file names to `data.parquet` in the golden outcomes.
 
         :param txt: input text containing parquet file names
         :return: text with standardized parquet file names
@@ -342,18 +342,56 @@ def purify_helpers(self, txt: str) -> str:
 
     def purify_docker_image_name(self, txt: str) -> str:
         """
-        Remove temporary docker image name that are function of their content.
+        Remove temporary docker image name.
 
         :param txt: input text containing docker image names
         :return: text with standardized docker image names
         """
-        # In a command like:
+        # Purify command like:
         # > docker run --rm ...  tmp.latex.edb567be ..
+        # > ... tmp.latex.aarch64.2f590c86.2f590c86
+        pattern = r"""
+            ^                  # Start of line
+            (                  # Start capture group 1
+                .*docker.*     # Any text containing "docker"
+                \s+            # One or more whitespace
+                tmp\.\S+\.     # tmp.something.
+            )                  # End capture group 1
+            [a-z0-9]{8}        # 8 character hex hash
+            (                  # Start capture group 2
+                \s+            # One or more whitespace
+                .*             # Rest of the line
+            )                  # End capture group 2
+            $                  # End of line
+        """
         txt = re.sub(
-            r"^(.*docker.*\s+tmp\.\S+\.)[a-z0-9]{8}(\s+.*)$",
+            pattern,
             r"\1xxxxxxxx\2",
             txt,
-            flags=re.MULTILINE,
+            flags=re.MULTILINE | re.VERBOSE,
+        )
+        # Handle patterns like `tmp.latex.aarch64.2f590c86.2f590c86`.
+        pattern = r"""
+            ^                    # Start of line
+            (                    # Start capture group 1
+                .*docker.*       # Any text containing "docker"
+                \s+              # One or more whitespace
+                tmp\.\S+\.\S+\.  # tmp.something.something.
+            )                    # End capture group 1
+            [a-z0-9]{8}          # 8 character hex hash
+            \.                   # Literal dot
+            [a-z0-9]{8}          # Another 8 character hex hash
+            (                    # Start capture group 2
+                \s+              # One or more whitespace
+                .*               # Rest of the line
+            )                    # End capture group 2
+            $                    # End of line
+        """
+        txt = re.sub(
+            pattern,
+            r"\1xxxxxxxx\2",
+            txt,
+            flags=re.MULTILINE | re.VERBOSE,
         )
         return txt
 
diff --git a/helpers/test/test_hgit.py b/helpers/test/test_hgit.py
index dbf83ad55..c328e629b 100644
--- a/helpers/test/test_hgit.py
+++ b/helpers/test/test_hgit.py
@@ -112,7 +112,11 @@ def test_group_hashes3(self) -> None:
         self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected)
 
     def _helper_group_hashes(
-        self, head_hash: str, remh_hash: str, subm_hash: Optional[str], expected: str
+        self,
+        head_hash: str,
+        remh_hash: str,
+        subm_hash: Optional[str],
+        expected: str,
     ) -> None:
         actual = hgit._group_hashes(head_hash, remh_hash, subm_hash)
         self.assert_equal(actual, expected, fuzzy_match=True)
diff --git a/helpers/test/test_hmarkdown_bullets.py b/helpers/test/test_hmarkdown_bullets.py
index 730a7ec98..0164d5980 100644
--- a/helpers/test/test_hmarkdown_bullets.py
+++ b/helpers/test/test_hmarkdown_bullets.py
@@ -534,7 +534,9 @@ def test1(self) -> None:
             _LOG.debug(hprint.to_str("line"))
             out.append(f"{i}:{line}")
         actual = "\n".join(out)
-        self.check_string(actual, dedent=True, remove_lead_trail_empty_lines=True)
+        self.check_string(
+            actual, dedent=True, remove_lead_trail_empty_lines=True
+        )
 
 
 # #############################################################################
@@ -566,4 +568,6 @@ def test1(self) -> None:
         txt_in = hio.from_file(input_file_path)
         txt_in = hprint.dedent(txt_in, remove_lead_trail_empty_lines_=True)
         actual = self.helper_process_code_block(txt_in)
-        self.check_string(actual, dedent=True, remove_lead_trail_empty_lines=True)
+        self.check_string(
+            actual, dedent=True, remove_lead_trail_empty_lines=True
+        )
diff --git a/helpers/test/test_hmarkdown_coloring.py b/helpers/test/test_hmarkdown_coloring.py
index 22a5987ac..e2d6b75e8 100644
--- a/helpers/test/test_hmarkdown_coloring.py
+++ b/helpers/test/test_hmarkdown_coloring.py
@@ -1,9 +1,57 @@
-import logging
-
 import helpers.hmarkdown as hmarkdo
 import helpers.hunit_test as hunitest
 
-_LOG = logging.getLogger(__name__)
+
+# #############################################################################
+# Test_process_color_commands1
+# #############################################################################
+
+
+class Test_process_color_commands1(hunitest.TestCase):
+    def test_text_content1(self) -> None:
+        """
+        Test with plain text content.
+        """
+        txt_in = r"\red{Hello world}"
+        expected = r"\textcolor{red}{\text{Hello world}}"
+        actual = hmarkdo.process_color_commands(txt_in)
+        self.assert_equal(actual, expected)
+
+    def test_math_content1(self) -> None:
+        """
+        Test color command with mathematical content.
+        """
+        txt_in = r"\blue{x + y = z}"
+        expected = r"\textcolor{blue}{x + y = z}"
+        actual = hmarkdo.process_color_commands(txt_in)
+        self.assert_equal(actual, expected)
+
+    def test_multiple_colors1(self) -> None:
+        """
+        Test multiple color commands in the same line.
+        """
+        txt_in = r"The \red{quick} \blue{fox} \green{jumps}"
+        expected = r"The \textcolor{red}{\text{quick}} \textcolor{blue}{\text{fox}} \textcolor{darkgreen}{\text{jumps}}"
+        actual = hmarkdo.process_color_commands(txt_in)
+        self.assert_equal(actual, expected)
+
+    def test_mixed_content1(self) -> None:
+        """
+        Test color commands with both text and math content.
+        """
+        txt_in = r"\red{Result: x^2 + y^2}"
+        expected = r"\textcolor{red}{Result: x^2 + y^2}"
+        actual = hmarkdo.process_color_commands(txt_in)
+        self.assert_equal(actual, expected)
+
+    def test_nested_braces1(self) -> None:
+        """
+        Test color command with nested braces.
+        """
+        txt_in = r"\blue{f(x) = {x + 1}}"
+        expected = r"\textcolor{blue}{f(x) = {x + 1}}"
+        actual = hmarkdo.process_color_commands(txt_in)
+        self.assert_equal(actual, expected)
 
 
 # #############################################################################
@@ -13,39 +61,81 @@
 
 class Test_colorize_bullet_points_in_slide1(hunitest.TestCase):
     def test1(self) -> None:
-        text = """
-        * Machine Learning Flow
+        # Prepare inputs.
+        text = r"""
+        - **VC Theory**
+            - Measures model
 
-        ::: columns
-        :::: {.column width=90%}
-        - Question
-        - E.g., "How can we predict house prices?"
-        - Input data
-        - E.g., historical data of house sales
+        - **Bias-Variance Decomposition**
+            - Prediction error
+                - **Bias**
+                - **Variance**
 
-        - _"If I were given one hour to save the planet, I would spend 59 minutes
-        defining the problem and one minute resolving it"_ (Albert Einstein)
+        - **Computation Complexity**
+            - Balances model
+            - Related to
+            - E.g., Minimum
 
-        - **Not all phases are equally important!**
-        - Question $>$ Data $>$ Features $>$ Algorithm
-        - Clarity of the question impacts project success
-        - Quality and relevance of data are crucial for performance
-        - Proper feature selection simplifies the model and improves accuracy
-        - Algorithm is often less important (contrary to popular belief!)
-        ::::
-        :::: {.column width=5%}
+        - **Bayesian Approach**
+            - Treats ML as probability
+            - Combines prior knowledge with observed data to update belief about a model
 
-        ```graphviz[height=90%]
-        digraph BayesianFlow {
-            rankdir=TD;
-            splines=true;
-            ...
-        }
-        ```
-        ::::
-        :::
+        - **Problem in ML Theory:**
+            - Assumptions may not align with practical problems
+        """
+        # Run function.
+        all_md_colors = [
+            "red",
+            "orange",
+            "yellow",
+            "lime",
+            "green",
+            "teal",
+            "cyan",
+            "blue",
+            "purple",
+            "violet",
+            "magenta",
+            "pink",
+            "brown",
+            "olive",
+            "gray",
+            "darkgray",
+            "lightgray",
+            "black",
+            "white",
+        ]
+
+        actual = hmarkdo.colorize_bullet_points_in_slide(
+            text, all_md_colors=all_md_colors
+        )
+        # Check output.
+        expected = r"""
+        - **\red{VC Theory}**
+            - Measures model
+
+        - **\orange{Bias-Variance Decomposition}**
+            - Prediction error
+                - **\yellow{Bias}**
+                - **\lime{Variance}**
+
+        - **\green{Computation Complexity}**
+            - Balances model
+            - Related to
+            - E.g., Minimum
+
+        - **\teal{Bayesian Approach}**
+            - Treats ML as probability
+            - Combines prior knowledge with observed data to update belief about a model
+
+        - **\cyan{Problem in ML Theory:}**
+            - Assumptions may not align with practical problems
         """
-        expected = """
+        self.assert_equal(actual, expected)
+
+    def test2(self) -> None:
+        # Prepare inputs.
+        text = r"""
         * Machine Learning Flow
 
         ::: columns
@@ -58,7 +148,7 @@ def test1(self) -> None:
         - _"If I were given one hour to save the planet, I would spend 59 minutes
         defining the problem and one minute resolving it"_ (Albert Einstein)
 
-        - **\\red{Not all phases are equally important!}**
+        - **Not all phases are equally important!**
         - Question $>$ Data $>$ Features $>$ Algorithm
         - Clarity of the question impacts project success
         - Quality and relevance of data are crucial for performance
@@ -77,6 +167,28 @@ def test1(self) -> None:
         ::::
         :::
         """
+        # Run function.
         actual = hmarkdo.colorize_bullet_points_in_slide(text)
         # Check output.
+        expected = r"""
+        - **\red{VC Theory}**
+            - Measures model
+
+        - **\yellow{Bias-Variance Decomposition}**
+            - Prediction error
+                - **\green{Bias}**
+                - **\cyan{Variance}**
+
+        - **\purple{Computation Complexity}**
+            - Balances model
+            - Related to
+            - E.g., Minimum
+
+        - **\magenta{Bayesian Approach}**
+            - Treats ML as probability
+            - Combines prior knowledge with observed data to update belief about a model
+
+        - **\brown{Problem in ML Theory:}**
+            - Assumptions may not align with practical problems
+        """
         self.assert_equal(actual, expected)
diff --git a/helpers/test/test_hmarkdown_headers.py b/helpers/test/test_hmarkdown_headers.py
index 0882f6508..79b7e0bc8 100644
--- a/helpers/test/test_hmarkdown_headers.py
+++ b/helpers/test/test_hmarkdown_headers.py
@@ -596,7 +596,9 @@ def test_single_header(self) -> None:
         # Call function.
         actual = hmarkdo.extract_headers_from_markdown(content, max_level=3)
         # Check output.
-        expected = r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]"""
+        expected = (
+            r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]"""
+        )
         self.assert_equal(str(actual), expected)
 
     def test_no_headers(self) -> None:
diff --git a/helpers/test/test_hmarkdown_rules.py b/helpers/test/test_hmarkdown_rules.py
index ea77183b0..c14b7cf34 100644
--- a/helpers/test/test_hmarkdown_rules.py
+++ b/helpers/test/test_hmarkdown_rules.py
@@ -206,13 +206,14 @@ def test4(self) -> None:
 
 
 class Test_parse_rules_from_txt1(hunitest.TestCase):
-    def helper(self, text: str, expected: str) -> None:
+    def helper(self, text: str, expected: List[str]) -> None:
         # Prepare inputs.
         text = hprint.dedent(text)
         # Call function.
         actual = hmarkdo.parse_rules_from_txt(text)
         # Check output.
-        actual = "\n".join(actual)
+        actual = str(actual)
+        expected = str(expected)
         self.assert_equal(actual, expected, dedent=True)
 
     def test_basic_list1(self) -> None:
@@ -224,11 +225,7 @@ def test_basic_list1(self) -> None:
         - Item 2
         - Item 3
         """
-        expected = """
-        - Item 1
-        - Item 2
-        - Item 3
-        """
+        expected = ["- Item 1", "- Item 2", "- Item 3"]
         self.helper(text, expected)
 
     def test_nested_list1(self) -> None:
@@ -242,13 +239,11 @@ def test_nested_list1(self) -> None:
           - Sub-item 2.2
         - Item 3
         """
-        expected = """
-        - Item 1
-        - Item 2
-          - Sub-item 2.1
-          - Sub-item 2.2
-        - Item 3
-        """
+        expected = [
+            "- Item 1",
+            "- Item 2\n  - Sub-item 2.1\n  - Sub-item 2.2",
+            "- Item 3",
+        ]
         self.helper(text, expected)
 
     def test_empty_list1(self) -> None:
@@ -256,7 +251,7 @@ def test_empty_list1(self) -> None:
         Test handling empty input.
         """
         text = ""
-        expected = ""
+        expected = []
         self.helper(text, expected)
 
 
@@ -304,7 +299,9 @@ def test_get_header_list1(self) -> None:
         """
         self.assert_equal(actual, expected, dedent=True)
 
-    def helper_extract_rules(self, selection_rules: List[str], expected: str) -> None:
+    def helper_extract_rules(
+        self, selection_rules: List[str], expected: str
+    ) -> None:
         """
         Helper function to test extracting rules from a markdown file.
         """
diff --git a/helpers/test/test_hmarkdown_tables.py b/helpers/test/test_hmarkdown_tables.py
new file mode 100644
index 000000000..f651aa3bf
--- /dev/null
+++ b/helpers/test/test_hmarkdown_tables.py
@@ -0,0 +1,196 @@
+import logging
+import pprint
+from typing import Dict, List
+
+import helpers.hmarkdown_tables as hmartabl
+import helpers.hprint as hprint
+import helpers.hunit_test as hunitest
+
+_LOG = logging.getLogger(__name__)
+
+
+# #############################################################################
+# Test_replace_tables_with_tags1
+# #############################################################################
+
+
+class Test_replace_tables_with_tags1(hunitest.TestCase):
+    def helper(
+        self, text: str, expected_lines: List[str], expected_map: Dict[str, str]
+    ) -> None:
+        """
+        Test replacing markdown tables with tags.
+        """
+        lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True)
+        lines = lines.split("\n")
+        # Call function.
+        actual_lines, table_map = hmartabl.replace_tables_with_tags(lines)
+        # Check output.
+        table_map_as_str = pprint.pformat(table_map)
+        expected_map_as_str = pprint.pformat(expected_map)
+        self.assert_equal(table_map_as_str, expected_map_as_str)
+        #
+        actual_lines = "\n".join(actual_lines)
+        expected_lines = hprint.dedent(
+            expected_lines, remove_lead_trail_empty_lines_=True
+        )
+        self.assert_equal(actual_lines, expected_lines)
+
+    def helper_round_trip(self, text: str) -> None:
+        """
+        Test the round trip.
+        """
+        # Do the round trip.
+        lines = text.split("\n")
+        actual_lines, table_map = hmartabl.replace_tables_with_tags(lines)
+        act_text = hmartabl.replace_tags_with_tables(actual_lines, table_map)
+        # Check output.
+        act_text = "\n".join(act_text)
+        self.assert_equal(act_text, text)
+
+    def test1(self) -> None:
+        """
+        Test replacing simple markdown table with tags.
+        """
+        # Prepare inputs.
+        text = """
+        Some text before
+        | Column 1 | Column 2 |
+        |----------|----------|
+        | Value 1  | Value 2  |
+        | Value 3  | Value 4  |
+        Text between tables
+        | Name | Age | City |
+        |------|-----|------|
+        | John | 25  | NYC  |
+        Some text after
+        """
+        # Prepare outputs.
+        expected_lines = """
+        Some text before
+        <table1>
+        Text between tables
+        <table2>
+        Some text after
+        """
+        # Check table map.
+        expected_map = {
+            "1": "| Column 1 | Column 2 |\n|----------|----------|\n| Value 1  | Value 2  |\n| Value 3  | Value 4  |",
+            "2": "| Name | Age | City |\n|------|-----|------|\n| John | 25  | NYC  |",
+        }
+        self.helper(text, expected_lines, expected_map)
+
+    def test2(self) -> None:
+        """
+        Test table with alignment indicators.
+        """
+        text = """
+        | Left | Center | Right |
+        |:-----|:------:|------:|
+        | L1   |   C1   |    R1 |
+        | L2   |   C2   |    R2 |
+        """
+        expected_lines = """
+        <table1>
+        """
+        expected_map = {
+            "1": "| Left | Center | Right |\n|:-----|:------:|------:|\n| L1   |   C1   |    R1 |\n| L2   |   C2   |    R2 |"
+        }
+        self.helper(text, expected_lines, expected_map)
+        #
+        self.helper_round_trip(text)
+
+    def test3(self) -> None:
+        """
+        Test table with minimal structure.
+        """
+        text = """
+        Before
+        | A | B |
+        |---|---|
+        | 1 | 2 |
+        After
+        """
+        expected_lines = """
+        Before
+        <table1>
+        After
+        """
+        expected_map = {"1": "| A | B |\n|---|---|\n| 1 | 2 |"}
+        self.helper(text, expected_lines, expected_map)
+        #
+        self.helper_round_trip(text)
+
+    def test4(self) -> None:
+        """
+        Test table with empty cells.
+        """
+        text = """
+        | Col1 | Col2 | Col3 |
+        |------|------|------|
+        | A    |      | C    |
+        |      | B    |      |
+        """
+        expected_lines = """
+        <table1>
+        """
+        expected_map = {
+            "1": "| Col1 | Col2 | Col3 |\n|------|------|------|\n| A    |      | C    |\n|      | B    |      |"
+        }
+        self.helper(text, expected_lines, expected_map)
+        #
+        self.helper_round_trip(text)
+
+    def test5(self) -> None:
+        """
+        Test multiple tables with different column counts.
+        """
+        text = """
+        First table:
+        | A | B |
+        |---|---|
+        | 1 | 2 |
+
+        Second table:
+        | X | Y | Z | W |
+        |---|---|---|---|
+        | a | b | c | d |
+        | e | f | g | h |
+        """
+        expected_lines = """
+        First table:
+        <table1>
+
+        Second table:
+        <table2>
+        """
+        expected_map = {
+            "1": "| A | B |\n|---|---|\n| 1 | 2 |",
+            "2": "| X | Y | Z | W |\n|---|---|---|---|\n| a | b | c | d |\n| e | f | g | h |",
+        }
+        self.helper(text, expected_lines, expected_map)
+        #
+        self.helper_round_trip(text)
+
+    def test6(self) -> None:
+        """
+        Test table with indentation.
+        """
+        text = """
+        Outside
+            | Col1 | Col2 |
+            |------|------|
+            | Val1 | Val2 |
+        End
+        """
+        expected_lines = """
+        Outside
+        <table1>
+        End
+        """
+        expected_map = {
+            "1": "    | Col1 | Col2 |\n    |------|------|\n    | Val1 | Val2 |"
+        }
+        self.helper(text, expected_lines, expected_map)
+        #
+        self.helper_round_trip(text)
diff --git a/helpers/test/test_hmarkdown_toc.py b/helpers/test/test_hmarkdown_toc.py
new file mode 100644
index 000000000..6a2f4a54e
--- /dev/null
+++ b/helpers/test/test_hmarkdown_toc.py
@@ -0,0 +1,101 @@
+import logging
+
+import helpers.hmarkdown as hmarkdo
+import helpers.hprint as hprint
+import helpers.hunit_test as hunitest
+
+_LOG = logging.getLogger(__name__)
+# #############################################################################
+# Test_remove_table_of_contents1
+# #############################################################################
+
+
+class Test_remove_table_of_contents1(hunitest.TestCase):
+    def test_with_toc(self) -> None:
+        """
+        Test removing table of contents from markdown text.
+        """
+        # Prepare inputs.
+        text = """
+        # Introduction
+
+        This is an introduction.
+
+        <!-- toc -->
+        - [Section 1](#section-1)
+        - [Section 2](#section-2)
+        <!-- tocstop -->
+
+        ## Section 1
+
+        Content of section 1.
+        """
+        expected = """
+        # Introduction
+
+        This is an introduction.
+
+
+
+        ## Section 1
+
+        Content of section 1.
+        """
+        text = hprint.dedent(text)
+        # Run test.
+        actual = hmarkdo.remove_table_of_contents(text)
+        # Check output.
+        expected = hprint.dedent(expected)
+        self.assert_equal(actual, expected)
+
+    def test_without_toc(self) -> None:
+        """
+        Test text without table of contents remains unchanged.
+        """
+        # Prepare inputs.
+        text = """
+        # Introduction
+
+        This is an introduction.
+
+        ## Section 1
+
+        Content of section 1.
+        """
+        text = hprint.dedent(text)
+        # Run test.
+        actual = hmarkdo.remove_table_of_contents(text)
+        # Check output.
+        self.assert_equal(actual, text)
+
+    def test_multiline_toc(self) -> None:
+        """
+        Test removing multi-line table of contents.
+        """
+        # Prepare inputs.
+        text = """
+        # Introduction
+
+        <!-- toc -->
+        - [Section 1](#section-1)
+          - [Subsection 1.1](#subsection-11)
+        - [Section 2](#section-2)
+          - [Subsection 2.1](#subsection-21)
+          - [Subsection 2.2](#subsection-22)
+        <!-- tocstop -->
+
+        ## Section 1
+        """
+        expected = """
+        # Introduction
+
+
+
+        ## Section 1
+        """
+        text = hprint.dedent(text)
+        # Run test.
+        actual = hmarkdo.remove_table_of_contents(text)
+        # Check output.
+        expected = hprint.dedent(expected)
+        self.assert_equal(actual, expected)
diff --git a/helpers/test/test_hmkdocs.py b/helpers/test/test_hmkdocs.py
index 4390f8f7a..16f0f097a 100644
--- a/helpers/test/test_hmkdocs.py
+++ b/helpers/test/test_hmkdocs.py
@@ -7,102 +7,6 @@
 _LOG = logging.getLogger(__name__)
 
 
-# #############################################################################
-# Test_remove_table_of_contents1
-# #############################################################################
-
-
-class Test_remove_table_of_contents1(hunitest.TestCase):
-    def test_with_toc(self) -> None:
-        """
-        Test removing table of contents from markdown text.
-        """
-        # Prepare inputs.
-        text = """
-        # Introduction
-
-        This is an introduction.
-
-        <!-- toc -->
-        - [Section 1](#section-1)
-        - [Section 2](#section-2)
-        <!-- tocstop -->
-
-        ## Section 1
-
-        Content of section 1.
-        """
-        expected = """
-        # Introduction
-
-        This is an introduction.
-
-
-
-        ## Section 1
-
-        Content of section 1.
-        """
-        text = hprint.dedent(text)
-        # Run test.
-        actual = hmkdocs.remove_table_of_contents(text)
-        # Check output.
-        expected = hprint.dedent(expected)
-        self.assert_equal(actual, expected)
-
-    def test_without_toc(self) -> None:
-        """
-        Test text without table of contents remains unchanged.
-        """
-        # Prepare inputs.
-        text = """
-        # Introduction
-
-        This is an introduction.
-
-        ## Section 1
-
-        Content of section 1.
-        """
-        text = hprint.dedent(text)
-        # Run test.
-        actual = hmkdocs.remove_table_of_contents(text)
-        # Check output.
-        self.assert_equal(actual, text)
-
-    def test_multiline_toc(self) -> None:
-        """
-        Test removing multi-line table of contents.
-        """
-        # Prepare inputs.
-        text = """
-        # Introduction
-
-        <!-- toc -->
-        - [Section 1](#section-1)
-          - [Subsection 1.1](#subsection-11)
-        - [Section 2](#section-2)
-          - [Subsection 2.1](#subsection-21)
-          - [Subsection 2.2](#subsection-22)
-        <!-- tocstop -->
-
-        ## Section 1
-        """
-        expected = """
-        # Introduction
-
-
-
-        ## Section 1
-        """
-        text = hprint.dedent(text)
-        # Run test.
-        actual = hmkdocs.remove_table_of_contents(text)
-        # Check output.
-        expected = hprint.dedent(expected)
-        self.assert_equal(actual, expected)
-
-
 # #############################################################################
 # Test_dedent_python_code_blocks1
 # #############################################################################
diff --git a/helpers/test/test_hparquet.py b/helpers/test/test_hparquet.py
index 49e8e14af..8fe34e678 100644
--- a/helpers/test/test_hparquet.py
+++ b/helpers/test/test_hparquet.py
@@ -986,7 +986,9 @@ def test_to_partitioned_dataset(self) -> None:
         dummy_value_1=3
         dummy_value_1=3/dummy_value_2=C
         dummy_value_1=3/dummy_value_2=C/data.parquet"""
-        self.assert_equal(dir_signature, expected, purify_text=True, fuzzy_match=True)
+        self.assert_equal(
+            dir_signature, expected, purify_text=True, fuzzy_match=True
+        )
         #
         include_file_content = True
         dir_signature = hunitest.get_dir_signature(
diff --git a/helpers/test/test_hunit_test.py b/helpers/test/test_hunit_test.py
index ed4a43537..2da2686e6 100644
--- a/helpers/test/test_hunit_test.py
+++ b/helpers/test/test_hunit_test.py
@@ -873,7 +873,9 @@ def _check_df_helper(
         golden_outcomes.to_csv(file_name)
         try:
             outcome_updated, file_exists, is_equal = self.check_dataframe(
-                actual, abort_on_error=abort_on_error, err_threshold=err_threshold
+                actual,
+                abort_on_error=abort_on_error,
+                err_threshold=err_threshold,
             )
         finally:
             # Clean up.
@@ -892,7 +894,9 @@ def test1(self) -> None:
         actual = "hello"
         # action_on_missing_golden = "assert"
         action_on_missing_golden = "update"
-        self.check_string(actual, action_on_missing_golden=action_on_missing_golden)
+        self.check_string(
+            actual, action_on_missing_golden=action_on_missing_golden
+        )
 
     def test2(self) -> None:
         actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split())
diff --git a/helpers/test/test_hunit_test_purification.py b/helpers/test/test_hunit_test_purification.py
index 53520cd9a..41efb42b5 100644
--- a/helpers/test/test_hunit_test_purification.py
+++ b/helpers/test/test_hunit_test_purification.py
@@ -8,7 +8,7 @@
 import logging
 import os
 import unittest.mock as umock
-from typing import List
+from typing import Any, List
 
 import pytest
 
@@ -116,28 +116,29 @@ def test9(self) -> None:
         # TODO(gp): We should remove the current path.
         # pylint: disable=line-too-long
         txt = r"""
-************* Module input [pylint]
-$SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint]
-$SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8]
-$SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8]
-cmd line='$SUPER_MODULE/dev_scripts/linter.py -f $SUPER_MODULE/amp/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log'
-dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint]
-dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint]
-dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy]
-"""
+        ************* Module input [pylint]
+        $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint]
+        $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8]
+        $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8]
+        cmd line='$SUPER_MODULE/dev_scripts/linter.py -f $SUPER_MODULE/amp/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log'
+        dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint]
+        dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint]
+        dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy]
+        """
+        txt = hprint.dedent(txt)
         txt = txt.replace("$SUPER_MODULE", super_module_path)
         expected = r"""
-************* Module input [pylint]
-$GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint]
-$GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8]
-$GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8]
-cmd line='$GIT_ROOT/dev_scripts/linter.py -f $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log'
-dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint]
-dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint]
-dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy]
-"""
+        ************* Module input [pylint]
+        $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint]
+        $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8]
+        $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8]
+        cmd line='$GIT_ROOT/dev_scripts/linter.py -f $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log'
+        dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint]
+        dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint]
+        dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy]
+        """
         # pylint: enable=line-too-long
-        self.check_helper(txt, expected)
+        self.check_helper(txt, expected, dedent=True)
 
     def test10(self) -> None:
         """
@@ -947,6 +948,20 @@ def test1(self) -> None:
         actual = text_purifier.purify_docker_image_name(txt)
         self.assert_equal(actual, expected, fuzzy_match=True)
 
+    def test2(self) -> None:
+        """
+        Test patterns like `tmp.latex.aarch64.2f590c86.2f590c86`.
+        """
+        txt = r"""
+        docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.2f590c86.2f590c86 pdflatex -output-directory
+        """
+        expected = r"""
+        docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.xxxxxxxx pdflatex -output-directory
+        """
+        text_purifier = huntepur.TextPurifier()
+        actual = text_purifier.purify_docker_image_name(txt)
+        self.assert_equal(actual, expected, fuzzy_match=True)
+
 
 # #############################################################################
 # Test_purify_line_number1
diff --git a/helpers/test/test_lib_tasks_docker_release.py b/helpers/test/test_lib_tasks_docker_release.py
index cddb2e7d7..58c8a5d9d 100644
--- a/helpers/test/test_lib_tasks_docker_release.py
+++ b/helpers/test/test_lib_tasks_docker_release.py
@@ -7,6 +7,7 @@
 import moto
 import pytest
 
+import helpers.hgit as hgit
 import helpers.hunit_test as hunitest
 import helpers.lib_tasks_docker as hlitadoc
 import helpers.lib_tasks_docker_release as hltadore
@@ -319,6 +320,11 @@ def test_multi_arch_prod_image1(self) -> None:
         """
         self._check_docker_command_output(expected, self.mock_run.call_args_list)
 
+    @pytest.mark.skipif(
+        not hgit.is_in_helpers_as_supermodule(),
+        # TODO(gp): Is the assertion too strict?
+        reason="Needs to run insde a super module",
+    )
     def test_candidate_tag1(self) -> None:
         """
         Test building with candidate mode using tag.
diff --git a/helpers/test/test_repo_config_amp.py b/helpers/test/test_repo_config_amp.py
index 17bedef39..ced80844b 100644
--- a/helpers/test/test_repo_config_amp.py
+++ b/helpers/test/test_repo_config_amp.py
@@ -225,7 +225,9 @@ def test_amp_ci(self) -> None:
         # We ignore the AWS vars, since GH Actions does some replacement to mask
         # the env vars coming from secrets.
         skip_secrets_vars = True
-        hunteuti.check_env_to_str(self, expected, skip_secrets_vars=skip_secrets_vars)
+        hunteuti.check_env_to_str(
+            self, expected, skip_secrets_vars=skip_secrets_vars
+        )
 
     @pytest.mark.skipif(
         not hrecouti.get_repo_config().get_name() == "//cmamp",
@@ -277,4 +279,6 @@ def test_cmamp_ci(self) -> None:
         # We ignore the AWS vars, since GH Actions does some replacement to mask
         # the env vars coming from secrets.
         skip_secrets_vars = True
-        hunteuti.check_env_to_str(self, expected, skip_secrets_vars=skip_secrets_vars)
+        hunteuti.check_env_to_str(
+            self, expected, skip_secrets_vars=skip_secrets_vars
+        )
diff --git a/helpers/unit_test_template.py b/helpers/unit_test_template.py
deleted file mode 100644
index 1b769e1b4..000000000
--- a/helpers/unit_test_template.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""
-Import as:
-
-import helpers.unit_test_template as hunteske
-"""
-
-import logging
-
-import helpers.hunit_test as hunitest
-
-_LOG = logging.getLogger(__name__)
-
-
-# #############################################################################
-# Test_Example
-# #############################################################################
-
-
-class Test_Example(hunitest.TestCase):
-    def test_example1(self) -> None:
-        pass
diff --git a/linters/test/test_amp_check_import.py b/linters/test/test_amp_check_import.py
index b1eb0f6e9..1cdd6d599 100644
--- a/linters/test/test_amp_check_import.py
+++ b/linters/test/test_amp_check_import.py
@@ -44,9 +44,13 @@ def test5(self) -> None:
         expected = ""
         self._helper_check_import(line, expected, file_name="test.py")
 
-    def _helper_check_import(self, line: str, expected: str, file_name: str) -> None:
+    def _helper_check_import(
+        self, line: str, expected: str, file_name: str
+    ) -> None:
         file_name = file_name or "test.py"
         line_num = 1
-        expected = f"{file_name}:{line_num}: {expected}" if expected else expected
+        expected = (
+            f"{file_name}:{line_num}: {expected}" if expected else expected
+        )
         msg = lamchimp._check_import(file_name, line_num, line)
         self.assertEqual(expected, msg)
diff --git a/linters/test/test_amp_check_shebang.py b/linters/test/test_amp_check_shebang.py
index 181a5e3e6..4ea695a18 100644
--- a/linters/test/test_amp_check_shebang.py
+++ b/linters/test/test_amp_check_shebang.py
@@ -1,6 +1,7 @@
 import pytest
 
 import helpers.hunit_test as hunitest
+import helpers.hprint as hprint
 import linters.amp_check_shebang as lamchshe
 
 
@@ -11,10 +12,12 @@ def test1(self) -> None:
         Executable with wrong shebang: error.
         """
         file_name = "exec.py"
-        txt = """#!/bin/bash
-hello
-world
-"""
+        txt = """
+        #!/bin/bash
+        hello
+        world
+        """
+        txt = hprint.dedent(txt)
         is_executable = True
         expected = "exec.py:1: any executable needs to start with a shebang '#!/usr/bin/env python'"
         self._helper_check_shebang(file_name, txt, is_executable, expected)
@@ -24,10 +27,12 @@ def test2(self) -> None:
         Executable with the correct shebang: correct.
         """
         file_name = "exec.py"
-        txt = """#!/usr/bin/env python
-hello
-world
-"""
+        txt = """
+        #!/usr/bin/env python
+        hello
+        world
+        """
+        txt = hprint.dedent(txt)
         is_executable = True
         expected = ""
         self._helper_check_shebang(file_name, txt, is_executable, expected)
@@ -37,10 +42,12 @@ def test3(self) -> None:
         Non executable with a shebang: error.
         """
         file_name = "exec.py"
-        txt = """#!/usr/bin/env python
-hello
-world
-"""
+        txt = """
+        #!/usr/bin/env python
+        hello
+        world
+        """
+        txt = hprint.dedent(txt)
         is_executable = False
         expected = "exec.py:1: a non-executable can't start with a shebang."
         self._helper_check_shebang(file_name, txt, is_executable, expected)
@@ -50,11 +57,13 @@ def test4(self) -> None:
         Library without a shebang: correct.
         """
         file_name = "lib.py"
-        txt = '''"""
-Import as:
+        txt = '''
+        """
+        Import as:
 
-import _setenv_lib as selib
-'''
+        import _setenv_lib as selib
+        '''
+        txt = hprint.dedent(txt)
         is_executable = False
         expected = ""
         self._helper_check_shebang(file_name, txt, is_executable, expected)
diff --git a/linters/test/test_amp_class_method_order.py b/linters/test/test_amp_class_method_order.py
index e65f7dc14..75f4ae13e 100644
--- a/linters/test/test_amp_class_method_order.py
+++ b/linters/test/test_amp_class_method_order.py
@@ -1,3 +1,4 @@
+import helpers.hprint as hprint
 import helpers.hunit_test as hunitest
 import linters.amp_class_method_order as laclmeor
 
@@ -8,42 +9,42 @@ def test_1(self) -> None:
         Test methods in incorrect order are re-ordered.
         """
         original = """
-class Test:
-    def test1():
-        pass
+        class Test:
+            def test1():
+                pass
 
-    def __init__():
-        pass
+            def __init__():
+                pass
 
-    def _test2():
-        pass
+            def _test2():
+                pass
 
-    def test3():
-        pass
+            def test3():
+                pass
 
-    def __magic_test__():
-        pass
+            def __magic_test__():
+                pass
 
-"""
+        """
         expected = """
-class Test:
+        class Test:
 
-    def __init__():
-        pass
+            def __init__():
+                pass
 
-    def __magic_test__():
-        pass
+            def __magic_test__():
+                pass
 
-    def test1():
-        pass
+            def test1():
+                pass
 
-    def test3():
-        pass
+            def test3():
+                pass
 
-    def _test2():
-        pass
+            def _test2():
+                pass
 
-"""
+        """
         self._helper(original, expected)
 
     def test_2(self) -> None:
@@ -51,22 +52,22 @@ def test_2(self) -> None:
         Test methods in correct order aren't re-ordered.
         """
         original = expected = """
-class Test:
-    def __init__():
-        pass
+        class Test:
+            def __init__():
+                pass
 
-    def __magic_test__():
-        pass
+            def __magic_test__():
+                pass
 
-    def test1():
-        pass
+            def test1():
+                pass
 
-    def test3():
-        pass
+            def test3():
+                pass
 
-    def _test2():
-        pass
-"""
+            def _test2():
+                pass
+        """
         self._helper(original, expected)
 
     def test_3(self) -> None:
@@ -76,25 +77,25 @@ def test_3(self) -> None:
         """
         # pylint: disable=line-too-long
         original = """
-class Test:
-    def test1():
-        # This is a test comment
-        pass
-
-    def __init__():
-        # Another comment
-        pass
-"""
+        class Test:
+            def test1():
+                # This is a test comment
+                pass
+
+            def __init__():
+                # Another comment
+                pass
+        """
         expected = """
-class Test:
-    def __init__():
-        # Another comment
-        pass
-
-    def test1():
-        # This is a test comment
-        pass
-"""
+        class Test:
+            def __init__():
+                # Another comment
+                pass
+
+            def test1():
+                # This is a test comment
+                pass
+        """
         self._helper(original, expected)
 
     def test_4(self) -> None:
@@ -103,25 +104,25 @@ def test_4(self) -> None:
         losing information.
         """
         original = '''
-class Test:
-    def test1():
-        """This is a test docstring"""
-        pass
-
-    def __init__():
-        """Another docstring"""
-        pass
-'''
+        class Test:
+            def test1():
+                """This is a test docstring"""
+                pass
+
+            def __init__():
+                """Another docstring"""
+                pass
+        '''
         expected = '''
-class Test:
-    def __init__():
-        """Another docstring"""
-        pass
-
-    def test1():
-        """This is a test docstring"""
-        pass
-'''
+        class Test:
+            def __init__():
+                """Another docstring"""
+                pass
+
+            def test1():
+                """This is a test docstring"""
+                pass
+        '''
         self._helper(original, expected)
 
     def test_5(self) -> None:
@@ -129,52 +130,52 @@ def test_5(self) -> None:
         Test that static and regular methods are re-ordered correctly.
         """
         original = """
-class Test:
-    @staticmethod
-    def test1():
-        pass
+        class Test:
+            @staticmethod
+            def test1():
+                pass
 
-    def __init__():
-        pass
+            def __init__():
+                pass
 
-    @staticmethod
-    def _test2():
-        pass
+            @staticmethod
+            def _test2():
+                pass
 
-    def test3():
-        pass
+            def test3():
+                pass
 
-    def _test4():
-        pass
+            def _test4():
+                pass
 
-    def __magic_test__():
-        pass
+            def __magic_test__():
+                pass
 
-"""
+        """
         expected = """
-class Test:
+        class Test:
 
-    def __init__():
-        pass
+            def __init__():
+                pass
 
-    def __magic_test__():
-        pass
+            def __magic_test__():
+                pass
 
-    @staticmethod
-    def test1():
-        pass
+            @staticmethod
+            def test1():
+                pass
 
-    def test3():
-        pass
+            def test3():
+                pass
 
-    @staticmethod
-    def _test2():
-        pass
+            @staticmethod
+            def _test2():
+                pass
 
-    def _test4():
-        pass
+            def _test4():
+                pass
 
-"""
+        """
         self._helper(original, expected)
 
     def test_6(self) -> None:
@@ -182,50 +183,51 @@ def test_6(self) -> None:
         Test re-ordering with different decorators.
         """
         original = """
-@pytest.mark.skip("ABC")
-class Test:
+        @pytest.mark.skip("ABC")
+        class Test:
 
-    def __init__():
-        pass
+            def __init__():
+                pass
 
-    @pytest.mark.skip("DEF")
-    def test1():
-        pass
+            @pytest.mark.skip("DEF")
+            def test1():
+                pass
 
-    @pytest.mark.slow()
-    @umock.patch.object(imvcdeexcl.hdateti, "get_current_time")
-    def _test2():
-        pass
+            @pytest.mark.slow()
+            @umock.patch.object(imvcdeexcl.hdateti, "get_current_time")
+            def _test2():
+                pass
 
-    def __magic_test__():
-        pass
+            def __magic_test__():
+                pass
 
-"""
+        """
         expected = """
-@pytest.mark.skip("ABC")
-class Test:
+        @pytest.mark.skip("ABC")
+        class Test:
 
-    def __init__():
-        pass
+            def __init__():
+                pass
 
-    def __magic_test__():
-        pass
+            def __magic_test__():
+                pass
 
-    @pytest.mark.skip("DEF")
-    def test1():
-        pass
+            @pytest.mark.skip("DEF")
+            def test1():
+                pass
 
-    @pytest.mark.slow()
-    @umock.patch.object(imvcdeexcl.hdateti, "get_current_time")
-    def _test2():
-        pass
+            @pytest.mark.slow()
+            @umock.patch.object(imvcdeexcl.hdateti, "get_current_time")
+            def _test2():
+                pass
 
-"""
+        """
         self._helper(original, expected)
 
     def _helper(self, txt: str, expected: str) -> None:
+        txt = hprint.dedent(txt)
         actual = laclmeor.order_methods(txt)
         # Remove empty lines since they can create issues.
         actual = hunitest.filter_text(r"^\s*$", actual)
         expected = hunitest.filter_text(r"^\s*$", expected)
-        self.assert_equal(actual, expected)
+        self.assert_equal(actual, expected, dedent=True)
diff --git a/linters/test/test_amp_fix_comment.py b/linters/test/test_amp_fix_comment.py
index 5dc1b52e1..22c884d00 100644
--- a/linters/test/test_amp_fix_comment.py
+++ b/linters/test/test_amp_fix_comment.py
@@ -44,8 +44,7 @@ def test3(self) -> None:
         self.assertEqual(expected, actual)
 
     @pytest.mark.skip(
-        reason="""Inline comments are not allowed, as they are hard to maintain
-        """
+        reason="Inline comments are not allowed, as they are hard to maintain"
     )
     def test4(self) -> None:
         """
diff --git a/unit_test_template.py b/unit_test_template.py
index 4fc6ae256..922d642c2 100644
--- a/unit_test_template.py
+++ b/unit_test_template.py
@@ -12,6 +12,16 @@
 
 
 class Test_format_compressed_markdown1(hunitest.TestCase):
+    def helper(self, actual: str, expected: str) -> None:
+        # Prepare inputs.
+        actual = hprint.dedent(actual)
+        actual = [line for line in actual.split("\n") if line != ""]
+        actual = "\n".join(actual)
+        # Prepare outputs.
+        expected = hprint.dedent(expected)
+        # Check output.
+        self.assert_equal(actual, expected)
+
     def test1(self) -> None:
         # Prepare inputs.
         # ...
@@ -25,35 +35,33 @@ def test2(self) -> None:
         """
         Test basic case with single first level bullet.
         """
+        # Prepare inputs.
         text = """
         Some text
 
         - First bullet
         More text"""
+        # Prepare outputs.
         expected = """
         Some text
         - First bullet
         More text"""
-        self._format_and_compare_markdown(text, expected)
+        # Check.
+        self.helper(text, expected)
 
     def test3(self) -> None:
         """
         Test multiple first level bullets.
         """
+        # Prepare inputs.
         text = """
         - First bullet
         - Second bullet
         - Third bullet"""
+        # Prepare outputs.
         expected = """
         - First bullet
         - Second bullet
         - Third bullet"""
-        self._format_and_compare_markdown(text, expected)
-
-    def _format_and_compare_markdown(self, actual: str, expected: str) -> None:
-        actual = hprint.dedent(actual)
-        actual = [line for line in actual.split("\n") if line != ""]
-        actual = "\n".join(actual)
-        expected = hprint.dedent(expected)
-        #
-        self.assert_equal(actual, expected)
+        # Check.
+        self.helper(text, expected)

From 2e3ace7d6b6486e5102d5078866a9fd47b53614e Mon Sep 17 00:00:00 2001
From: GP Saggese <saggese@gmail.com>
Date: Thu, 24 Jul 2025 08:08:03 -0400
Subject: [PATCH 2/3] Update

---
 .../output/test.txt                           |  7 +++
 .../test/test_preprocess_notes.py             | 58 +++++++++----------
 2 files changed, 36 insertions(+), 29 deletions(-)
 create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_end_to_end1.test_run_all2/output/test.txt

diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_end_to_end1.test_run_all2/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_end_to_end1.test_run_all2/output/test.txt
new file mode 100644
index 000000000..0985eb324
--- /dev/null
+++ b/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_end_to_end1.test_run_all2/output/test.txt
@@ -0,0 +1,7 @@
+---
+fontsize: 10pt
+---
+\let\emph\textit
+\let\uline\underline
+\let\ul\underline
+/app/helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_preprocess_notes_end_to_end1.test_run_all2/input/input.txt
\ No newline at end of file
diff --git a/dev_scripts_helpers/documentation/test/test_preprocess_notes.py b/dev_scripts_helpers/documentation/test/test_preprocess_notes.py
index cdfd4d8e6..f6b370e66 100644
--- a/dev_scripts_helpers/documentation/test/test_preprocess_notes.py
+++ b/dev_scripts_helpers/documentation/test/test_preprocess_notes.py
@@ -31,48 +31,48 @@ class Test_process_question1(hunitest.TestCase):
     the library function directly.
     """
 
+    def helper(self, txt_in: str, do_continue_exp: bool, expected: str) -> None:
+        do_continue, actual = dshdprno._process_question_to_markdown(txt_in)
+        self.assertEqual(do_continue, do_continue_exp)
+        self.assert_equal(actual, expected)
+
     def test_process_question1(self) -> None:
         txt_in = "* Hope is not a strategy"
         do_continue_exp = True
-        exp = "- **Hope is not a strategy**"
-        self.helper(txt_in, do_continue_exp, exp)
+        expected = "- **Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, expected)
 
     def test_process_question2(self) -> None:
         txt_in = "** Hope is not a strategy"
         do_continue_exp = True
-        exp = "- **Hope is not a strategy**"
-        self.helper(txt_in, do_continue_exp, exp)
+        expected = "- **Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, expected)
 
     def test_process_question3(self) -> None:
         txt_in = "*: Hope is not a strategy"
         do_continue_exp = True
-        exp = "- **Hope is not a strategy**"
-        self.helper(txt_in, do_continue_exp, exp)
+        expected = "- **Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, expected)
 
     def test_process_question4(self) -> None:
         txt_in = "- Systems don't run themselves, they need to be run"
         do_continue_exp = False
-        exp = txt_in
-        self.helper(txt_in, do_continue_exp, exp)
+        expected = txt_in
+        self.helper(txt_in, do_continue_exp, expected)
 
     def test_process_question5(self) -> None:
         space = "   "
         txt_in = "*" + space + "Hope is not a strategy"
         do_continue_exp = True
-        exp = "-" + space + "**Hope is not a strategy**"
-        self.helper(txt_in, do_continue_exp, exp)
+        expected = "-" + space + "**Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, expected)
 
     def test_process_question6(self) -> None:
         space = "   "
         txt_in = "**" + space + "Hope is not a strategy"
         do_continue_exp = True
-        exp = "-" + " " * len(space) + "**Hope is not a strategy**"
-        self.helper(txt_in, do_continue_exp, exp)
-
-    def helper(self, txt_in: str, do_continue_exp: bool, exp: str) -> None:
-        do_continue, act = dshdprno._process_question_to_markdown(txt_in)
-        self.assertEqual(do_continue, do_continue_exp)
-        self.assert_equal(actual, expected)
+        expected = "-" + " " * len(space) + "**Hope is not a strategy**"
+        self.helper(txt_in, do_continue_exp, expected)
 
 
 # #############################################################################
@@ -143,8 +143,8 @@ def _is_integer(value):
                         print(v)
             ```
         """
-        exp = hprint.dedent(exp, remove_lead_trail_empty_lines_=True)
-        self.assert_equal(act, exp)
+        expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=True)
+        self.assert_equal(actual, expected)
 
     def test_run_all2(self) -> None:
         """
@@ -155,9 +155,9 @@ def test_run_all2(self) -> None:
         txt_in = hprint.dedent(txt_in, remove_lead_trail_empty_lines_=True)
         # Run function.
         type_ = "slides"
-        act = dshdprno._transform_lines(txt_in, type_, is_qa=False)
+        actual = dshdprno._transform_lines(txt_in, type_, is_qa=False)
         # Check.
-        self.check_string(act)
+        self.check_string(actual)
 
 
 # #############################################################################
@@ -194,8 +194,8 @@ def helper(in_file: str, out_file: str, type_: str) -> str:
         # Run.
         hsystem.system(cmd_as_str)
         # Check.
-        act = hio.from_file(out_file)
-        return act  # type: ignore
+        actual = hio.from_file(out_file)
+        return actual  # type: ignore
 
     def test1(self) -> None:
         # Prepare inputs.
@@ -203,9 +203,9 @@ def test1(self) -> None:
         out_file = os.path.join(self.get_scratch_space(), "output.txt")
         type_ = "pdf"
         # Run.
-        act = self.helper(in_file, out_file, type_)
+        actual = self.helper(in_file, out_file, type_)
         # Check.
-        self.check_string(act)
+        self.check_string(actual)
 
     def test2(self) -> None:
         # Prepare inputs.
@@ -213,9 +213,9 @@ def test2(self) -> None:
         out_file = os.path.join(self.get_scratch_space(), "output.txt")
         type_ = "pdf"
         # Run.
-        act = self.helper(in_file, out_file, type_)
+        actual = self.helper(in_file, out_file, type_)
         # Check.
-        self.check_string(act)
+        self.check_string(actual)
 
     def test3(self) -> None:
         # Prepare inputs.
@@ -223,6 +223,6 @@ def test3(self) -> None:
         out_file = os.path.join(self.get_scratch_space(), "output.txt")
         type_ = "pdf"
         # Run.
-        act = self.helper(in_file, out_file, type_)
+        actual = self.helper(in_file, out_file, type_)
         # Check.
-        self.check_string(act)
+        self.check_string(actual)

From 712c382145ac89d9836424246e9be87d1ae75303 Mon Sep 17 00:00:00 2001
From: GP Saggese <saggese@gmail.com>
Date: Thu, 24 Jul 2025 08:20:38 -0400
Subject: [PATCH 3/3] Update

---
 helpers/hmarkdown.py                         |  1 +
 helpers/test/test_hmarkdown_coloring.py      | 13 ++++++-------
 helpers/test/test_hunit_test_purification.py |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py
index c7babad0f..0d7f88441 100644
--- a/helpers/hmarkdown.py
+++ b/helpers/hmarkdown.py
@@ -13,4 +13,5 @@
 from helpers.hmarkdown_headers import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
 from helpers.hmarkdown_rules import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
 from helpers.hmarkdown_slides import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
+from helpers.hmarkdown_tables import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
 from helpers.hmarkdown_toc import *  # isort:skip  # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import
diff --git a/helpers/test/test_hmarkdown_coloring.py b/helpers/test/test_hmarkdown_coloring.py
index e2d6b75e8..9b2681adb 100644
--- a/helpers/test/test_hmarkdown_coloring.py
+++ b/helpers/test/test_hmarkdown_coloring.py
@@ -105,7 +105,6 @@ def test1(self) -> None:
             "black",
             "white",
         ]
-
         actual = hmarkdo.colorize_bullet_points_in_slide(
             text, all_md_colors=all_md_colors
         )
@@ -114,21 +113,21 @@ def test1(self) -> None:
         - **\red{VC Theory}**
             - Measures model
 
-        - **\orange{Bias-Variance Decomposition}**
+        - **\yellow{Bias-Variance Decomposition}**
             - Prediction error
-                - **\yellow{Bias}**
-                - **\lime{Variance}**
+                - **\green{Bias}**
+                - **\cyan{Variance}**
 
-        - **\green{Computation Complexity}**
+        - **\purple{Computation Complexity}**
             - Balances model
             - Related to
             - E.g., Minimum
 
-        - **\teal{Bayesian Approach}**
+        - **\magenta{Bayesian Approach}**
             - Treats ML as probability
             - Combines prior knowledge with observed data to update belief about a model
 
-        - **\cyan{Problem in ML Theory:}**
+        - **\brown{Problem in ML Theory:}**
             - Assumptions may not align with practical problems
         """
         self.assert_equal(actual, expected)
diff --git a/helpers/test/test_hunit_test_purification.py b/helpers/test/test_hunit_test_purification.py
index 41efb42b5..6488621a1 100644
--- a/helpers/test/test_hunit_test_purification.py
+++ b/helpers/test/test_hunit_test_purification.py
@@ -28,10 +28,10 @@
 
 
 class Test_purify_text1(hunitest.TestCase):
-    def check_helper(self, txt: str, expected: str) -> None:
+    def check_helper(self, txt: str, expected: str, **kwargs: Any) -> None:
         text_purifier = huntepur.TextPurifier()
         actual = text_purifier.purify_txt_from_client(txt)
-        self.assert_equal(actual, expected)
+        self.assert_equal(actual, expected, **kwargs)
 
     def test1(self) -> None:
         txt = "amp/helpers/test/test_system_interaction.py"