diff --git a/.github/workflows/rhiza_marimo.yml b/.github/workflows/rhiza_marimo.yml
new file mode 100644
index 0000000..4109d6c
--- /dev/null
+++ b/.github/workflows/rhiza_marimo.yml
@@ -0,0 +1,100 @@
+# This file is part of the jebel-quant/rhiza repository
+# (https://github.com/jebel-quant/rhiza).
+#
+# Workflow: Marimo Notebooks
+#
+# Purpose: This workflow discovers and executes all Marimo notebooks in the
+#          repository. It builds a dynamic matrix to run each notebook in
+#          parallel to surface errors early and keep notebooks reproducible.
+#
+# Trigger: This workflow runs on every push and on pull requests to main/master
+#          branches (including from forks)
+#
+# Components:
+#   - 🔎 Discover notebooks in book/marimo
+#   - 🧪 Run each notebook in parallel using a matrix strategy
+#   - ✅ Fail-fast disabled to report all failing notebooks
+
+name: "(RHIZA) MARIMO"
+
+permissions:
+  contents: read
+
+on:
+  push
+
+jobs:
+  # Build a matrix of notebooks to test
+  list-notebooks:
+    runs-on: ubuntu-latest
+    outputs:
+      notebook-list: ${{ steps.notebooks.outputs.matrix }}
+    steps:
+      # Check out the repository code
+      - uses: actions/checkout@v6.0.2
+
+      # Find all Python files in the marimo folder and create a matrix for parallel execution
+      - name: Find notebooks and build matrix
+        id: notebooks
+        run: |
+          # Extract MARIMO_FOLDER from the project configuration (via Makefile)
+          # shellcheck disable=SC2016 # Single quotes intentional - Make syntax, not shell expansion
+          NOTEBOOK_DIR=$(make -s -f Makefile -f - <<< 'print: ; @echo $(or $(MARIMO_FOLDER),marimo)' print)
+
+          echo "Searching notebooks in: $NOTEBOOK_DIR"
+          # Check if directory exists
+          if [ ! -d "$NOTEBOOK_DIR" ]; then
+            echo "Directory $NOTEBOOK_DIR does not exist. Setting empty matrix."
+            echo "matrix=[]" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Find notebooks and handle empty results
+          if [ -z "$(find "$NOTEBOOK_DIR" -maxdepth 1 -name "*.py" 2>/dev/null)" ]; then
+            echo "No notebooks found in $NOTEBOOK_DIR. Setting empty matrix."
+            echo "matrix=[]" >> "$GITHUB_OUTPUT"
+          else
+            notebooks=$(find "$NOTEBOOK_DIR" -maxdepth 1 -name "*.py" -print0 | xargs -0 -n1 echo | jq -R -s -c 'split("\n")[:-1]')
+            echo "matrix=$notebooks" >> "$GITHUB_OUTPUT"
+          fi
+        shell: bash
+
+  # Create one job per notebook using the matrix strategy for parallel execution
+  test-notebooks:
+    if: needs.list-notebooks.outputs.notebook-list != '[]'
+    runs-on: ubuntu-latest
+    needs: list-notebooks
+    strategy:
+      matrix:
+        notebook: ${{ fromJson(needs.list-notebooks.outputs.notebook-list) }}
+      # Don't fail the entire workflow if one notebook fails
+      fail-fast: false
+    name: Run notebook ${{ matrix.notebook }}
+    steps:
+      # Check out the repository code
+      - uses: actions/checkout@v6.0.2
+        with:
+          lfs: true
+
+      # Install uv/uvx
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7.2.1
+        with:
+          version: "0.9.28"
+
+      # Execute the notebook with the appropriate runner based on its content
+      - name: Run notebook
+        env:
+          UV_EXTRA_INDEX_URL: ${{ secrets.UV_EXTRA_INDEX_URL }}
+        run: |
+          uvx uv run "${{ matrix.notebook }}"
+          # uvx → creates a fresh ephemeral environment
+          # uv run → runs the notebook as a script in that ephemeral env
+          # No project packages are pre-installed
+          # ✅ This forces the notebook to explicitly handle dependencies (e.g., uv install ., or pip install inside the script).
+          # ✅ It’s a true integration smoke test.
+          # Benefits of this pattern
+          # Confirms the notebook can bootstrap itself in a fresh environment
+          # Catches missing uv install or pip steps early
+          # Ensures CI/other users can run the notebook without manual setup
+        shell: bash
diff --git a/Makefile b/Makefile
index 979800a..3a48ca7 100644
--- a/Makefile
+++ b/Makefile
@@ -24,5 +24,5 @@ jupyter: install  ## Install and start jupyter Lab
 
 .PHONY: marimo
 marimo: install ## Install and start marimo
-    @uv run pip install marimo
-    @uv run marimo edit --no-token --headless .
+	@uv run pip install marimo
+	@uv run marimo edit --no-token --headless .
diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
new file mode 100644
index 0000000..4a7c668
--- /dev/null
+++ b/marimo/Ch03-linreg-lab.py
@@ -0,0 +1,899 @@
+# /// script
+# dependencies = [
+#   "marimo==0.19.6",
+#   "numpy==2.3.1",
+#   "pandas",
+#   "matplotlib",
+#   "statsmodels",
+#   "ISLP"
+# ]
+# requires-python = ">=3.12"
+# ///
+
+import marimo
+
+__generated_with = "0.19.6"
+app = marimo.App()
+
+with app.setup:
+    import marimo as mo
+    import numpy as np
+    import pandas as pd
+
+    from matplotlib.pyplot import subplots
+
+    import statsmodels.api as sm
+    from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
+    from statsmodels.stats.anova import anova_lm
+    
+    from ISLP import load_data
+    from ISLP.models import (ModelSpec as MS,
+                             summarize,
+                             poly)
+    
+@app.cell
+def _():
+    mo.md(r"""
+    # Linear Regression
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ## Importing packages
+    We import our standard libraries at this top
+    level.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ### New imports
+    Throughout this lab we will introduce new functions and libraries. However,
+    we will import them here to emphasize these are the new
+    code objects in this lab. Keeping imports near the top
+    of a notebook makes the code more readable, since scanning the first few
+    lines tells us what libraries are used.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    We will provide relevant details about the
+    functions below as they are needed.
+
+    Besides importing whole modules, it is also possible
+    to import only a few items from a given module. This
+    will help keep the  *namespace* clean.
+    We will use a few specific objects from the `statsmodels` package
+    which we import here.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    As one of the import statements above is quite a long line, we inserted a line break `\` to
+    ease readability.
+
+    We will also use some functions written for the labs in this book in the `ISLP`
+    package.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ### Inspecting Objects and Namespaces
+    The
+    function  `dir()`
+    provides a list of
+    objects in a namespace.
+    """)
+    return
+
+
+@app.cell
+def _():
+    dir()
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    This shows you everything that `Python` can find at the top level.
+    There are certain objects like `__builtins__` that contain references to built-in
+    functions like `print()`.
+
+    Every python object has its own notion of
+    namespace, also accessible with `dir()`. This will include
+    both the attributes of the object
+    as well as any methods associated with it. For instance, we see `'sum'` in the listing for an
+    array.
+    """)
+    return
+
+
+@app.cell
+def _():
+    A = np.array([3,5,11])
+    dir(A)
+    return (A,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    This indicates that the object `A.sum` exists. In this case it is a method
+    that can be used to compute the sum of the array `A` as can be seen by typing `A.sum?`.
+    """)
+    return
+
+
+@app.cell
+def _(A):
+    A.sum()
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+ 
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ## Simple Linear Regression
+    In this section we will  construct model
+    matrices (also called design matrices) using the `ModelSpec()`  transform from `ISLP.models`.
+
+    We  will use the `Boston` housing data set, which is contained in the `ISLP` package.  The `Boston` dataset records  `medv`  (median house value) for $506$ neighborhoods
+    around Boston.  We will build a regression model to predict  `medv`  using $13$
+    predictors such as  `rm`  (average number of rooms per house),
+     `age`  (proportion of owner-occupied units built prior to 1940), and  `lstat`  (percent of
+    households with low socioeconomic status).  We will use `statsmodels` for this
+    task, a `Python` package that implements several commonly used
+    regression methods.
+
+    We have included a simple loading function `load_data()` in the
+    `ISLP` package:
+    """)
+    return
+
+
+@app.cell
+def _():
+    Boston = load_data("Boston")
+    Boston.columns
+    return (Boston,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Type `Boston?` to find out more about these data.
+
+    We start by using the `sm.OLS()`  function to fit a
+    simple linear regression model.  Our response will be
+     `medv`  and  `lstat`  will be the single predictor.
+    For this model, we can create the model matrix by hand.
+    """)
+    return
+
+
+@app.cell
+def _(Boston):
+    X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]),
+                      'lstat': Boston['lstat']})
+    X[:4]
+    return (X,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    We extract the response, and fit the model.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, X):
+    y = Boston['medv']
+    _model = sm.OLS(y, X)
+    results = _model.fit()
+    return results, y
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Note that `sm.OLS()` does
+    not fit the model; it specifies the model, and then `model.fit()` does the actual fitting.
+
+    Our `ISLP` function `summarize()` produces a simple table of the parameter estimates,
+    their standard errors, t-statistics and p-values.
+    The function takes a single argument, such as the object `results`
+    returned here by the `fit`
+    method, and returns such a summary.
+    """)
+    return
+
+
+@app.cell
+def _(results):
+    summarize(results)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Before we describe other methods for working with fitted models, we outline a more useful and general framework for constructing a model matrix~`X`.
+    ### Using Transformations: Fit and Transform
+    Our model above has a single predictor, and constructing `X` was straightforward.
+    In practice  we often fit models with more than one predictor, typically selected from an array or data frame.
+    We may wish to introduce transformations to the variables before fitting the model, specify interactions between variables, and expand some particular variables into sets of variables (e.g. polynomials).
+    The `sklearn`  package has a particular notion
+    for this type of task: a *transform*. A transform is an object
+    that is created with some parameters as arguments. The
+    object has two main methods: `fit()` and `transform()`.
+
+    We provide a general approach for specifying models and constructing
+    the model matrix through the transform `ModelSpec()` in the `ISLP` library.
+    `ModelSpec()`
+    (renamed `MS()` in the preamble) creates a
+    transform object, and then a pair of methods
+    `transform()` and `fit()` are used to construct a
+    corresponding model matrix.
+
+    We first describe this process for our simple regression model  using a single predictor `lstat` in
+    the `Boston` data frame, but will use it repeatedly in more
+    complex tasks in this and other labs in this book.
+    In our case the transform is created by the expression
+    `design = MS(['lstat'])`.
+
+    The `fit()`  method takes the original array and may do some
+    initial computations on it, as specified in the transform object.
+    For example, it may compute means and standard deviations for centering and scaling.
+    The `transform()`
+    method applies the fitted transformation to the array of data, and produces the model matrix.
+    """)
+    return
+
+
+@app.cell
+def _(Boston):
+    design = MS(['lstat'])
+    design = design.fit(Boston)
+    X_1 = design.transform(Boston)
+    X_1[:4]
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    In this simple case, the `fit()`  method does very little; it simply checks that the variable `'lstat'` specified in `design` exists in `Boston`. Then `transform()` constructs the model matrix with two columns: an `intercept` and the variable `lstat`.
+
+    These two operations can be  combined with the
+    `fit_transform()`  method.
+    """)
+    return
+
+
+@app.cell
+def _(Boston):
+    design_1 = MS(['lstat'])
+    X_2 = design_1.fit_transform(Boston)
+    X_2[:4]
+    return X_2, design_1
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Note that, as in the previous code chunk when the two steps were done separately, the `design` object is changed as a result of the `fit()` operation. The power of this pipeline will become clearer when we fit more complex models that involve interactions and transformations.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Let's return to our fitted regression model.
+    The object
+    `results` has several methods that can be used for inference.
+    We already presented a function `summarize()` for showing the essentials of the fit.
+    For a full and somewhat exhaustive summary of the fit, we can use the `summary()`
+    method.
+    """)
+    return
+
+
+@app.cell
+def _(results):
+    results.summary()
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    The fitted coefficients can also be retrieved as the
+    `params` attribute of `results`.
+    """)
+    return
+
+
+@app.cell
+def _(results):
+    results.params
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    The `get_prediction()`  method can be used to obtain predictions, and produce confidence intervals and
+    prediction intervals for the prediction of  `medv`  for  given values of  `lstat`.
+
+    We first create a new data frame, in this case containing only the variable `lstat`, with the values for this variable at which we wish to make predictions.
+    We then use the `transform()` method of `design` to create the corresponding model matrix.
+    """)
+    return
+
+
+@app.cell
+def _(design_1):
+    new_df = pd.DataFrame({'lstat': [5, 10, 15]})
+    newX = design_1.transform(new_df)
+    newX
+    return (newX,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Next we compute the predictions at `newX`, and view them by extracting the `predicted_mean` attribute.
+    """)
+    return
+
+
+@app.cell
+def _(newX, results):
+    new_predictions = results.get_prediction(newX);
+    new_predictions.predicted_mean
+    return (new_predictions,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    We can produce confidence intervals for the predicted values.
+    """)
+    return
+
+
+@app.cell
+def _(new_predictions):
+    new_predictions.conf_int(alpha=0.05)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Prediction intervals are computed by setting `obs=True`:
+    """)
+    return
+
+
+@app.cell
+def _(new_predictions):
+    new_predictions.conf_int(obs=True, alpha=0.05)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    For instance, the 95% confidence interval associated with an
+     `lstat`  value of 10 is (24.47, 25.63), and the 95% prediction
+    interval is (12.82, 37.28).  As expected, the confidence and
+    prediction intervals are centered around the same point (a predicted
+    value of 25.05 for  `medv`  when  `lstat`  equals
+    10), but the latter are substantially wider.
+
+    Next we will plot  `medv`  and  `lstat`
+    using `DataFrame.plot.scatter()`, \definelongblankMR{plot.scatter()}{plot.slashslashscatter()}
+    and wish to
+    add the regression line to the resulting plot.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ### Defining Functions
+    While there is a function
+    within the `ISLP` package that adds a line to an existing plot, we take this opportunity
+    to define our first function to do so.
+    """)
+    return
+
+
+@app.function
+def abline(ax, b, m):
+    """Add a line with slope m and intercept b to ax"""
+    xlim = ax.get_xlim()
+    ylim = [m * xlim[0] + b, m * xlim[1] + b]
+    ax.plot(xlim, ylim)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    A few things are illustrated above. First we see the syntax for defining a function:
+    `def funcname(...)`. The function has arguments `ax, b, m`
+    where `ax` is an axis object for an existing plot, `b` is the intercept and
+    `m` is the slope of the desired line. Other plotting  options can be passed on to
+    `ax.plot` by including additional optional arguments as follows:
+    """)
+    return
+
+
+@app.function
+def abline_1(ax, b, m, *args, **kwargs):
+    """Add a line with slope m and intercept b to ax"""
+    xlim = ax.get_xlim()
+    ylim = [m * xlim[0] + b, m * xlim[1] + b]
+    ax.plot(xlim, ylim, *args, **kwargs)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    The addition of `*args` allows any number of
+    non-named arguments to `abline`, while `**kwargs` allows any
+    number of named arguments (such as `linewidth=3`) to `abline`.
+    In our function, we pass
+    these arguments verbatim to `ax.plot` above. Readers
+    interested in learning more about
+    functions are referred to the section on
+    defining functions in [docs.python.org/tutorial](https://docs.python.org/3/tutorial/controlflow.html#defining-functions).
+
+    Let’s use our new function to add this regression line to a plot of
+    `medv` vs. `lstat`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, results):
+    _ax = Boston.plot.scatter('lstat', 'medv')
+    abline_1(_ax, results.params['intercept'], results.params['lstat'], 'r--', linewidth=3)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Thus, the final call to `ax.plot()` is `ax.plot(xlim, ylim, 'r--', linewidth=3)`.
+    We have used the argument `'r--'` to produce a red dashed line, and added
+    an argument to make it of width 3.
+    There is some evidence for non-linearity in the relationship between  `lstat`  and  `medv`. We will explore this issue later in this lab.
+
+    As mentioned above, there is an existing function to add a line to a plot --- `ax.axline()` --- but knowing how to write such functions empowers us to create more expressive displays.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Next we examine some diagnostic plots, several of which were discussed
+    in Section~\ref{Ch3:problems.sec}.
+    We can find the fitted values and residuals
+    of the fit as attributes of the `results` object.
+    Various influence measures describing the regression model
+    are computed with the `get_influence()` method.
+    As we will not use the `fig` component returned
+    as the first value from `subplots()`, we simply
+    capture the second returned value in `ax` below.
+    """)
+    return
+
+
+@app.cell
+def _(results):
+    _ax = subplots(figsize=(8, 8))[1]
+    _ax.scatter(results.fittedvalues, results.resid)
+    _ax.set_xlabel('Fitted value')
+    _ax.set_ylabel('Residual')
+    _ax.axhline(0, c='k', ls='--')
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    We add a horizontal line at 0 for reference using the
+     `ax.axhline()`   method, indicating
+    it should be black (`c='k'`) and have a dashed linestyle (`ls='--'`).
+
+    On the basis of the residual plot, there is some evidence of non-linearity.
+    Leverage statistics can be computed for any number of predictors using the
+    `hat_matrix_diag` attribute of the value returned by the
+    `get_influence()`  method.
+    """)
+    return
+
+
+@app.cell
+def _(X_2, results):
+    infl = results.get_influence()
+    _ax = subplots(figsize=(8, 8))[1]
+    _ax.scatter(np.arange(X_2.shape[0]), infl.hat_matrix_diag)
+    _ax.set_xlabel('Index')
+    _ax.set_ylabel('Leverage')
+    np.argmax(infl.hat_matrix_diag)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    The `np.argmax()`  function identifies the index of the largest element of an array, optionally computed over an axis of the array.
+    In this case, we maximized over the entire array
+    to determine which observation has the largest leverage statistic.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ## Multiple Linear Regression
+    In order to fit a multiple linear regression model using least squares, we again use
+    the `ModelSpec()`  transform to construct the required
+    model matrix and response. The arguments
+    to `ModelSpec()` can be quite general, but in this case
+    a list of column names suffice. We consider a fit here with
+    the two variables `lstat` and `age`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, y):
+    X_3 = MS(['lstat', 'age']).fit_transform(Boston)
+    _model1 = sm.OLS(y, X_3)
+    results1 = _model1.fit()
+    summarize(results1)
+    return (results1,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Notice how we have compacted the first line into a succinct expression describing the construction of `X`.
+
+    The  `Boston`   data set contains 12 variables, and so it would be cumbersome
+    to have to type all of these in order to perform a regression using all of the predictors.
+    Instead, we can use the following short-hand:\definelongblankMR{columns.drop()}{columns.slashslashdrop()}
+    """)
+    return
+
+
+@app.cell
+def _(Boston):
+    terms = Boston.columns.drop('medv')
+    terms
+    return (terms,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    We can now fit the model with all the variables in `terms` using
+    the same model matrix builder.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, terms, y):
+    X_4 = MS(terms).fit_transform(Boston)
+    _model = sm.OLS(y, X_4)
+    results_1 = _model.fit()
+    summarize(results_1)
+    return (X_4,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    What if we would like to perform a regression using all of the variables but one?  For
+    example, in the above regression output,   `age`  has a high $p$-value.
+    So we may wish to run a regression excluding this predictor.
+    The following syntax results in a regression using all predictors except  `age`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, y):
+    minus_age = Boston.columns.drop(['medv', 'age'])
+    Xma = MS(minus_age).fit_transform(Boston)
+    _model1 = sm.OLS(y, Xma)
+    summarize(_model1.fit())
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ## Multivariate Goodness of Fit
+    We can access the individual components of `results` by name
+    (`dir(results)` shows us what is available). Hence
+    `results.rsquared` gives us the $R^2$,
+    and
+    `np.sqrt(results.scale)` gives us the RSE.
+
+    Variance inflation factors (section~\ref{Ch3:problems.sec}) are sometimes useful
+    to assess the effect of collinearity in the model matrix of a regression model.
+    We will compute the VIFs in our multiple regression fit, and use the opportunity to introduce the idea of *list comprehension*.
+
+    ### List Comprehension
+    Often we encounter a sequence of objects which we would like to transform
+    for some other task. Below, we compute the VIF for each
+    feature in our `X` matrix and produce a data frame
+    whose index agrees with the columns of `X`.
+    The notion of list comprehension can often make such
+    a task easier.
+
+    List comprehensions are simple and powerful ways to form
+    lists of `Python` objects. The language also supports
+    dictionary and *generator* comprehension, though these are
+    beyond our scope here. Let's look at an example. We compute the VIF for each of the variables
+    in the model matrix `X`, using the function `variance_inflation_factor()`.
+    """)
+    return
+
+
+@app.cell
+def _(X_4):
+    _vals = [VIF(X_4, i) for i in range(1, X_4.shape[1])]
+    vif = pd.DataFrame({'vif': _vals}, index=X_4.columns[1:])
+    vif
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    The function `VIF()` takes two arguments: a dataframe or array,
+    and a variable column index. In the code above we call `VIF()` on the fly for all columns in `X`.
+    We have excluded column 0 above (the intercept), which is not of interest. In this case the VIFs are not that exciting.
+
+    The object `vals` above could have been constructed with the following for loop:
+    """)
+    return
+
+
+@app.cell
+def _(X_4):
+    _vals = []
+    for i in range(1, X_4.values.shape[1]):
+        _vals.append(VIF(X_4.values, i))
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    List comprehension allows us to perform such repetitive operations in a more straightforward way.
+    ## Interaction Terms
+    It is easy to include interaction terms in a linear model using `ModelSpec()`.
+    Including a tuple `("lstat","age")` tells the model
+    matrix builder to include an interaction term between
+     `lstat`  and  `age`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, y):
+    X_5 = MS(['lstat', 'age', ('lstat', 'age')]).fit_transform(Boston)
+    model2 = sm.OLS(y, X_5)
+    summarize(model2.fit())
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ## Non-linear Transformations of the Predictors
+    The model matrix builder can include terms beyond
+    just column names and interactions. For instance,
+    the `poly()` function supplied in `ISLP` specifies that
+    columns representing polynomial functions
+    of its first argument are added to the model matrix.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, y):
+    X_6 = MS([poly('lstat', degree=2), 'age']).fit_transform(Boston)
+    model3 = sm.OLS(y, X_6)
+    results3 = model3.fit()
+    summarize(results3)
+    return (results3,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    The effectively zero *p*-value associated with the quadratic term
+    (i.e. the third row above) suggests that it leads to an improved model.
+
+    By default, `poly()` creates a basis matrix for inclusion in the
+    model matrix whose
+    columns are *orthogonal polynomials*, which are designed for stable
+    least squares computations. {Actually, `poly()` is a  wrapper for the workhorse and standalone  function `Poly()` that does the  work in building the model matrix.}
+    Alternatively, had we included an argument
+    `raw=True` in the above call to `poly()`, the basis matrix would consist simply of
+    `lstat` and `lstat**2`. Since either of these bases
+    represent quadratic polynomials, the fitted values  would not
+    change in this case, just the polynomial coefficients.  Also by default, the columns
+    created by `poly()` do not include an intercept column as
+    that is automatically added by `MS()`.
+
+    We use the `anova_lm()` function to further quantify the extent to which the quadratic fit is
+    superior to the linear fit.
+    """)
+    return
+
+
+@app.cell
+def _(results1, results3):
+    anova_lm(results1, results3)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    Here `results1` represents the linear submodel containing
+    predictors `lstat` and `age`,
+    while `results3` corresponds to the larger model above  with a quadratic
+    term in `lstat`.
+    The `anova_lm()` function performs a hypothesis test
+    comparing the two models. The null hypothesis is that the quadratic
+    term in the bigger model is not needed, and the alternative hypothesis is that the
+    bigger model is superior. Here the *F*-statistic is 177.28 and
+    the associated *p*-value is zero.
+    In this case the *F*-statistic is the square of the
+    *t*-statistic for the quadratic term in the linear model summary
+    for `results3` --- a consequence of the fact that these nested
+    models differ by one degree of freedom.
+    This provides very clear evidence that the quadratic polynomial in
+    `lstat` improves the linear model.
+    This is not surprising, since earlier we saw evidence for non-linearity in the relationship between `medv`
+    and  `lstat`.
+
+    The function `anova_lm()` can take more than two nested models
+    as input, in which case it compares every successive pair of models.
+    That also explains why there are `NaN`s in the first row above, since
+    there is no previous model with which to compare the first.
+    """)
+    return
+
+
+@app.cell
+def _(results3):
+    _ax = subplots(figsize=(8, 8))[1]
+    _ax.scatter(results3.fittedvalues, results3.resid)
+    _ax.set_xlabel('Fitted value')
+    _ax.set_ylabel('Residual')
+    _ax.axhline(0, c='k', ls='--')
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    We see that when the quadratic term is included in the model,
+    there is little discernible pattern in the residuals.
+    In order to create a cubic or higher-degree polynomial fit, we can simply change the degree argument
+    to `poly()`.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ## Qualitative Predictors
+    Here we use the  `Carseats`  data, which is included in the
+    `ISLP` package. We will  attempt to predict `Sales`
+    (child car seat sales) in 400 locations based on a number of
+    predictors.
+    """)
+    return
+
+
+@app.cell
+def _():
+    Carseats = load_data('Carseats')
+    Carseats.columns
+    return (Carseats,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    The `Carseats`
+     data includes qualitative predictors such as
+     `ShelveLoc`, an indicator of the quality of the shelving
+     location --- that is,
+    the  space within a store in which the car seat is displayed. The predictor
+     `ShelveLoc`  takes on three possible values, `Bad`, `Medium`, and `Good`.
+    Given a qualitative variable such as  `ShelveLoc`, `ModelSpec()` generates dummy
+    variables automatically.
+    These variables are often referred to as a *one-hot encoding* of the categorical
+    feature. Their columns sum to one, so to avoid collinearity with an intercept, the first column is dropped. Below we see
+    the column `ShelveLoc[Bad]` has been dropped, since `Bad` is the first level of `ShelveLoc`.
+    Below we fit a multiple regression model that includes some interaction terms.
+    """)
+    return
+
+
+@app.cell
+def _(Carseats):
+    allvars = list(Carseats.columns.drop('Sales'))
+    y_1 = Carseats['Sales']
+    final = allvars + [('Income', 'Advertising'), ('Price', 'Age')]
+    X_7 = MS(final).fit_transform(Carseats)
+    _model = sm.OLS(y_1, X_7)
+    summarize(_model.fit())
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    In the first line above, we made `allvars` a list, so that we
+    could add the interaction terms two lines down.
+    Our model-matrix builder has created a `ShelveLoc[Good]`
+    dummy variable that takes on a value of 1 if the
+    shelving location is good, and 0 otherwise. It has also created a `ShelveLoc[Medium]`
+    dummy variable that equals 1 if the shelving location is medium, and 0 otherwise.
+    A bad shelving location corresponds to a zero for each of the two dummy variables.
+    The fact that the coefficient for `ShelveLoc[Good]` in the regression output is
+    positive indicates that a good shelving location is associated with high sales (relative to a bad location).
+    And `ShelveLoc[Medium]` has a smaller positive coefficient,
+    indicating that a medium shelving location leads to higher sales than a bad
+    shelving location, but lower sales than a good shelving location.
+    """)
+    return
+
+
+if __name__ == "__main__":
+    app.run()