From 683f99e06d706e33d99196a1099531f820cda5e9 Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Tue, 3 Feb 2026 21:52:06 +0400
Subject: [PATCH 01/13] Create .gitkeep in marimo folder

---
 marimo/.gitkeep | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 marimo/.gitkeep

diff --git a/marimo/.gitkeep b/marimo/.gitkeep
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/marimo/.gitkeep
@@ -0,0 +1 @@
+

From 5fc9de1fc8e2435e134ec45c6a1891e19e91989b Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Tue, 3 Feb 2026 22:43:34 +0400
Subject: [PATCH 02/13] a first notebook for marimo

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 979800a0..3a48ca76 100644
--- a/Makefile
+++ b/Makefile
@@ -24,5 +24,5 @@ jupyter: install  ## Install and start jupyter Lab
 
 .PHONY: marimo
 marimo: install ## Install and start marimo
-    @uv run pip install marimo
-    @uv run marimo edit --no-token --headless .
+	@uv run pip install marimo
+	@uv run marimo edit --no-token --headless .

From 28d3317abac533e214deb52ccb990381e85cf375 Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Tue, 3 Feb 2026 22:44:03 +0400
Subject: [PATCH 03/13] a first notebook

---
 marimo/Ch03-linreg-lab.py | 909 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 909 insertions(+)
 create mode 100644 marimo/Ch03-linreg-lab.py

diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
new file mode 100644
index 00000000..2d7baa4e
--- /dev/null
+++ b/marimo/Ch03-linreg-lab.py
@@ -0,0 +1,909 @@
+import marimo
+
+__generated_with = "0.19.7"
+app = marimo.App()
+
+
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+
+
+@app.cell
+def _(mo):
+    mo.md(r"""
+    # Linear Regression
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Importing packages
+    We import our standard libraries at this top
+    level.
+    """)
+    return
+
+
+@app.cell
+def _():
+    import numpy as np
+    import pandas as pd
+    from matplotlib.pyplot import subplots
+    return np, pd, subplots
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ### New imports
+    Throughout this lab we will introduce new functions and libraries. However,
+    we will import them here to emphasize these are the new
+    code objects in this lab. Keeping imports near the top
+    of a notebook makes the code more readable, since scanning the first few
+    lines tells us what libraries are used.
+    """)
+    return
+
+
+@app.cell
+def _():
+    import statsmodels.api as sm
+    return (sm,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    We will provide relevant details about the
+    functions below as they are needed.
+
+    Besides importing whole modules, it is also possible
+    to import only a few items from a given module. This
+    will help keep the  *namespace* clean.
+    We will use a few specific objects from the `statsmodels` package
+    which we import here.
+    """)
+    return
+
+
+@app.cell
+def _():
+    from statsmodels.stats.outliers_influence \
+         import variance_inflation_factor as VIF
+    from statsmodels.stats.anova import anova_lm
+    return VIF, anova_lm
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    As one of the import statements above is quite a long line, we inserted a line break `\` to
+    ease readability.
+
+    We will also use some functions written for the labs in this book in the `ISLP`
+    package.
+    """)
+    return
+
+
+@app.cell
+def _():
+    from ISLP import load_data
+    from ISLP.models import (ModelSpec as MS,
+                             summarize,
+                             poly)
+    return MS, load_data, poly, summarize
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ### Inspecting Objects and Namespaces
+    The
+    function  `dir()`
+    provides a list of
+    objects in a namespace.
+    """)
+    return
+
+
+@app.cell
+def _():
+    dir()
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    This shows you everything that `Python` can find at the top level.
+    There are certain objects like `__builtins__` that contain references to built-in
+    functions like `print()`.
+
+    Every python object has its own notion of
+    namespace, also accessible with `dir()`. This will include
+    both the attributes of the object
+    as well as any methods associated with it. For instance, we see `'sum'` in the listing for an
+    array.
+    """)
+    return
+
+
+@app.cell
+def _(np):
+    A = np.array([3,5,11])
+    dir(A)
+    return (A,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    This indicates that the object `A.sum` exists. In this case it is a method
+    that can be used to compute the sum of the array `A` as can be seen by typing `A.sum?`.
+    """)
+    return
+
+
+@app.cell
+def _(A):
+    A.sum()
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+ 
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Simple Linear Regression
+    In this section we will  construct model
+    matrices (also called design matrices) using the `ModelSpec()`  transform from `ISLP.models`.
+
+    We  will use the `Boston` housing data set, which is contained in the `ISLP` package.  The `Boston` dataset records  `medv`  (median house value) for $506$ neighborhoods
+    around Boston.  We will build a regression model to predict  `medv`  using $13$
+    predictors such as  `rm`  (average number of rooms per house),
+     `age`  (proportion of owner-occupied units built prior to 1940), and  `lstat`  (percent of
+    households with low socioeconomic status).  We will use `statsmodels` for this
+    task, a `Python` package that implements several commonly used
+    regression methods.
+
+    We have included a simple loading function `load_data()` in the
+    `ISLP` package:
+    """)
+    return
+
+
+@app.cell
+def _(load_data):
+    Boston = load_data("Boston")
+    Boston.columns
+    return (Boston,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Type `Boston?` to find out more about these data.
+
+    We start by using the `sm.OLS()`  function to fit a
+    simple linear regression model.  Our response will be
+     `medv`  and  `lstat`  will be the single predictor.
+    For this model, we can create the model matrix by hand.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, np, pd):
+    X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]),
+                      'lstat': Boston['lstat']})
+    X[:4]
+    return (X,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    We extract the response, and fit the model.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, X, sm):
+    y = Boston['medv']
+    _model = sm.OLS(y, X)
+    results = _model.fit()
+    return results, y
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Note that `sm.OLS()` does
+    not fit the model; it specifies the model, and then `model.fit()` does the actual fitting.
+
+    Our `ISLP` function `summarize()` produces a simple table of the parameter estimates,
+    their standard errors, t-statistics and p-values.
+    The function takes a single argument, such as the object `results`
+    returned here by the `fit`
+    method, and returns such a summary.
+    """)
+    return
+
+
+@app.cell
+def _(results, summarize):
+    summarize(results)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Before we describe other methods for working with fitted models, we outline a more useful and general framework for constructing a model matrix~`X`.
+    ### Using Transformations: Fit and Transform
+    Our model above has a single predictor, and constructing `X` was straightforward.
+    In practice  we often fit models with more than one predictor, typically selected from an array or data frame.
+    We may wish to introduce transformations to the variables before fitting the model, specify interactions between variables, and expand some particular variables into sets of variables (e.g. polynomials).
+    The `sklearn`  package has a particular notion
+    for this type of task: a *transform*. A transform is an object
+    that is created with some parameters as arguments. The
+    object has two main methods: `fit()` and `transform()`.
+
+    We provide a general approach for specifying models and constructing
+    the model matrix through the transform `ModelSpec()` in the `ISLP` library.
+    `ModelSpec()`
+    (renamed `MS()` in the preamble) creates a
+    transform object, and then a pair of methods
+    `transform()` and `fit()` are used to construct a
+    corresponding model matrix.
+
+    We first describe this process for our simple regression model  using a single predictor `lstat` in
+    the `Boston` data frame, but will use it repeatedly in more
+    complex tasks in this and other labs in this book.
+    In our case the transform is created by the expression
+    `design = MS(['lstat'])`.
+
+    The `fit()`  method takes the original array and may do some
+    initial computations on it, as specified in the transform object.
+    For example, it may compute means and standard deviations for centering and scaling.
+    The `transform()`
+    method applies the fitted transformation to the array of data, and produces the model matrix.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, MS):
+    design = MS(['lstat'])
+    design = design.fit(Boston)
+    X_1 = design.transform(Boston)
+    X_1[:4]
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    In this simple case, the `fit()`  method does very little; it simply checks that the variable `'lstat'` specified in `design` exists in `Boston`. Then `transform()` constructs the model matrix with two columns: an `intercept` and the variable `lstat`.
+
+    These two operations can be  combined with the
+    `fit_transform()`  method.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, MS):
+    design_1 = MS(['lstat'])
+    X_2 = design_1.fit_transform(Boston)
+    X_2[:4]
+    return X_2, design_1
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Note that, as in the previous code chunk when the two steps were done separately, the `design` object is changed as a result of the `fit()` operation. The power of this pipeline will become clearer when we fit more complex models that involve interactions and transformations.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Let's return to our fitted regression model.
+    The object
+    `results` has several methods that can be used for inference.
+    We already presented a function `summarize()` for showing the essentials of the fit.
+    For a full and somewhat exhaustive summary of the fit, we can use the `summary()`
+    method.
+    """)
+    return
+
+
+@app.cell
+def _(results):
+    results.summary()
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    The fitted coefficients can also be retrieved as the
+    `params` attribute of `results`.
+    """)
+    return
+
+
+@app.cell
+def _(results):
+    results.params
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    The `get_prediction()`  method can be used to obtain predictions, and produce confidence intervals and
+    prediction intervals for the prediction of  `medv`  for  given values of  `lstat`.
+
+    We first create a new data frame, in this case containing only the variable `lstat`, with the values for this variable at which we wish to make predictions.
+    We then use the `transform()` method of `design` to create the corresponding model matrix.
+    """)
+    return
+
+
+@app.cell
+def _(design_1, pd):
+    new_df = pd.DataFrame({'lstat': [5, 10, 15]})
+    newX = design_1.transform(new_df)
+    newX
+    return (newX,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Next we compute the predictions at `newX`, and view them by extracting the `predicted_mean` attribute.
+    """)
+    return
+
+
+@app.cell
+def _(newX, results):
+    new_predictions = results.get_prediction(newX);
+    new_predictions.predicted_mean
+    return (new_predictions,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    We can produce confidence intervals for the predicted values.
+    """)
+    return
+
+
+@app.cell
+def _(new_predictions):
+    new_predictions.conf_int(alpha=0.05)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Prediction intervals are computed by setting `obs=True`:
+    """)
+    return
+
+
+@app.cell
+def _(new_predictions):
+    new_predictions.conf_int(obs=True, alpha=0.05)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    For instance, the 95% confidence interval associated with an
+     `lstat`  value of 10 is (24.47, 25.63), and the 95% prediction
+    interval is (12.82, 37.28).  As expected, the confidence and
+    prediction intervals are centered around the same point (a predicted
+    value of 25.05 for  `medv`  when  `lstat`  equals
+    10), but the latter are substantially wider.
+
+    Next we will plot  `medv`  and  `lstat`
+    using `DataFrame.plot.scatter()`, \definelongblankMR{plot.scatter()}{plot.slashslashscatter()}
+    and wish to
+    add the regression line to the resulting plot.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ### Defining Functions
+    While there is a function
+    within the `ISLP` package that adds a line to an existing plot, we take this opportunity
+    to define our first function to do so.
+    """)
+    return
+
+
+@app.function
+def abline(ax, b, m):
+    """Add a line with slope m and intercept b to ax"""
+    xlim = _ax.get_xlim()
+    ylim = [m * xlim[0] + b, m * xlim[1] + b]
+    _ax.plot(xlim, ylim)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    A few things are illustrated above. First we see the syntax for defining a function:
+    `def funcname(...)`. The function has arguments `ax, b, m`
+    where `ax` is an axis object for an existing plot, `b` is the intercept and
+    `m` is the slope of the desired line. Other plotting  options can be passed on to
+    `ax.plot` by including additional optional arguments as follows:
+    """)
+    return
+
+
+@app.function
+def abline_1(ax, b, m, *args, **kwargs):
+    """Add a line with slope m and intercept b to ax"""
+    xlim = ax.get_xlim()
+    ylim = [m * xlim[0] + b, m * xlim[1] + b]
+    ax.plot(xlim, ylim, *args, **kwargs)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    The addition of `*args` allows any number of
+    non-named arguments to `abline`, while `**kwargs` allows any
+    number of named arguments (such as `linewidth=3`) to `abline`.
+    In our function, we pass
+    these arguments verbatim to `ax.plot` above. Readers
+    interested in learning more about
+    functions are referred to the section on
+    defining functions in [docs.python.org/tutorial](https://docs.python.org/3/tutorial/controlflow.html#defining-functions).
+
+    Let’s use our new function to add this regression line to a plot of
+    `medv` vs. `lstat`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, results):
+    _ax = Boston.plot.scatter('lstat', 'medv')
+    abline_1(_ax, results.params['intercept'], results.params['lstat'], 'r--', linewidth=3)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Thus, the final call to `ax.plot()` is `ax.plot(xlim, ylim, 'r--', linewidth=3)`.
+    We have used the argument `'r--'` to produce a red dashed line, and added
+    an argument to make it of width 3.
+    There is some evidence for non-linearity in the relationship between  `lstat`  and  `medv`. We will explore this issue later in this lab.
+
+    As mentioned above, there is an existing function to add a line to a plot --- `ax.axline()` --- but knowing how to write such functions empowers us to create more expressive displays.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Next we examine some diagnostic plots, several of which were discussed
+    in Section~\ref{Ch3:problems.sec}.
+    We can find the fitted values and residuals
+    of the fit as attributes of the `results` object.
+    Various influence measures describing the regression model
+    are computed with the `get_influence()` method.
+    As we will not use the `fig` component returned
+    as the first value from `subplots()`, we simply
+    capture the second returned value in `ax` below.
+    """)
+    return
+
+
+@app.cell
+def _(results, subplots):
+    _ax = subplots(figsize=(8, 8))[1]
+    _ax.scatter(results.fittedvalues, results.resid)
+    _ax.set_xlabel('Fitted value')
+    _ax.set_ylabel('Residual')
+    _ax.axhline(0, c='k', ls='--')
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    We add a horizontal line at 0 for reference using the
+     `ax.axhline()`   method, indicating
+    it should be black (`c='k'`) and have a dashed linestyle (`ls='--'`).
+
+    On the basis of the residual plot, there is some evidence of non-linearity.
+    Leverage statistics can be computed for any number of predictors using the
+    `hat_matrix_diag` attribute of the value returned by the
+    `get_influence()`  method.
+    """)
+    return
+
+
+@app.cell
+def _(X_2, np, results, subplots):
+    infl = results.get_influence()
+    _ax = subplots(figsize=(8, 8))[1]
+    _ax.scatter(np.arange(X_2.shape[0]), infl.hat_matrix_diag)
+    _ax.set_xlabel('Index')
+    _ax.set_ylabel('Leverage')
+    np.argmax(infl.hat_matrix_diag)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    The `np.argmax()`  function identifies the index of the largest element of an array, optionally computed over an axis of the array.
+    In this case, we maximized over the entire array
+    to determine which observation has the largest leverage statistic.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Multiple Linear Regression
+    In order to fit a multiple linear regression model using least squares, we again use
+    the `ModelSpec()`  transform to construct the required
+    model matrix and response. The arguments
+    to `ModelSpec()` can be quite general, but in this case
+    a list of column names suffice. We consider a fit here with
+    the two variables `lstat` and `age`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, MS, sm, summarize, y):
+    X_3 = MS(['lstat', 'age']).fit_transform(Boston)
+    _model1 = sm.OLS(y, X_3)
+    results1 = _model1.fit()
+    summarize(results1)
+    return (results1,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Notice how we have compacted the first line into a succinct expression describing the construction of `X`.
+
+    The  `Boston`   data set contains 12 variables, and so it would be cumbersome
+    to have to type all of these in order to perform a regression using all of the predictors.
+    Instead, we can use the following short-hand:\definelongblankMR{columns.drop()}{columns.slashslashdrop()}
+    """)
+    return
+
+
+@app.cell
+def _(Boston):
+    terms = Boston.columns.drop('medv')
+    terms
+    return (terms,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    We can now fit the model with all the variables in `terms` using
+    the same model matrix builder.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, MS, sm, summarize, terms, y):
+    X_4 = MS(terms).fit_transform(Boston)
+    _model = sm.OLS(y, X_4)
+    results_1 = _model.fit()
+    summarize(results_1)
+    return (X_4,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    What if we would like to perform a regression using all of the variables but one?  For
+    example, in the above regression output,   `age`  has a high $p$-value.
+    So we may wish to run a regression excluding this predictor.
+    The following syntax results in a regression using all predictors except  `age`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, MS, sm, summarize, y):
+    minus_age = Boston.columns.drop(['medv', 'age'])
+    Xma = MS(minus_age).fit_transform(Boston)
+    _model1 = sm.OLS(y, Xma)
+    summarize(_model1.fit())
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Multivariate Goodness of Fit
+    We can access the individual components of `results` by name
+    (`dir(results)` shows us what is available). Hence
+    `results.rsquared` gives us the $R^2$,
+    and
+    `np.sqrt(results.scale)` gives us the RSE.
+
+    Variance inflation factors (section~\ref{Ch3:problems.sec}) are sometimes useful
+    to assess the effect of collinearity in the model matrix of a regression model.
+    We will compute the VIFs in our multiple regression fit, and use the opportunity to introduce the idea of *list comprehension*.
+
+    ### List Comprehension
+    Often we encounter a sequence of objects which we would like to transform
+    for some other task. Below, we compute the VIF for each
+    feature in our `X` matrix and produce a data frame
+    whose index agrees with the columns of `X`.
+    The notion of list comprehension can often make such
+    a task easier.
+
+    List comprehensions are simple and powerful ways to form
+    lists of `Python` objects. The language also supports
+    dictionary and *generator* comprehension, though these are
+    beyond our scope here. Let's look at an example. We compute the VIF for each of the variables
+    in the model matrix `X`, using the function `variance_inflation_factor()`.
+    """)
+    return
+
+
+@app.cell
+def _(VIF, X_4, pd):
+    _vals = [VIF(X_4, i) for i in range(1, X_4.shape[1])]
+    vif = pd.DataFrame({'vif': _vals}, index=X_4.columns[1:])
+    vif
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    The function `VIF()` takes two arguments: a dataframe or array,
+    and a variable column index. In the code above we call `VIF()` on the fly for all columns in `X`.
+    We have excluded column 0 above (the intercept), which is not of interest. In this case the VIFs are not that exciting.
+
+    The object `vals` above could have been constructed with the following for loop:
+    """)
+    return
+
+
+@app.cell
+def _(VIF, X_4):
+    _vals = []
+    for i in range(1, X_4.values.shape[1]):
+        _vals.append(VIF(X_4.values, i))
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    List comprehension allows us to perform such repetitive operations in a more straightforward way.
+    ## Interaction Terms
+    It is easy to include interaction terms in a linear model using `ModelSpec()`.
+    Including a tuple `("lstat","age")` tells the model
+    matrix builder to include an interaction term between
+     `lstat`  and  `age`.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, MS, sm, summarize, y):
+    X_5 = MS(['lstat', 'age', ('lstat', 'age')]).fit_transform(Boston)
+    model2 = sm.OLS(y, X_5)
+    summarize(model2.fit())
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Non-linear Transformations of the Predictors
+    The model matrix builder can include terms beyond
+    just column names and interactions. For instance,
+    the `poly()` function supplied in `ISLP` specifies that
+    columns representing polynomial functions
+    of its first argument are added to the model matrix.
+    """)
+    return
+
+
+@app.cell
+def _(Boston, MS, poly, sm, summarize, y):
+    X_6 = MS([poly('lstat', degree=2), 'age']).fit_transform(Boston)
+    model3 = sm.OLS(y, X_6)
+    results3 = model3.fit()
+    summarize(results3)
+    return (results3,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    The effectively zero *p*-value associated with the quadratic term
+    (i.e. the third row above) suggests that it leads to an improved model.
+
+    By default, `poly()` creates a basis matrix for inclusion in the
+    model matrix whose
+    columns are *orthogonal polynomials*, which are designed for stable
+    least squares computations. {Actually, `poly()` is a  wrapper for the workhorse and standalone  function `Poly()` that does the  work in building the model matrix.}
+    Alternatively, had we included an argument
+    `raw=True` in the above call to `poly()`, the basis matrix would consist simply of
+    `lstat` and `lstat**2`. Since either of these bases
+    represent quadratic polynomials, the fitted values  would not
+    change in this case, just the polynomial coefficients.  Also by default, the columns
+    created by `poly()` do not include an intercept column as
+    that is automatically added by `MS()`.
+
+    We use the `anova_lm()` function to further quantify the extent to which the quadratic fit is
+    superior to the linear fit.
+    """)
+    return
+
+
+@app.cell
+def _(anova_lm, results1, results3):
+    anova_lm(results1, results3)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    Here `results1` represents the linear submodel containing
+    predictors `lstat` and `age`,
+    while `results3` corresponds to the larger model above  with a quadratic
+    term in `lstat`.
+    The `anova_lm()` function performs a hypothesis test
+    comparing the two models. The null hypothesis is that the quadratic
+    term in the bigger model is not needed, and the alternative hypothesis is that the
+    bigger model is superior. Here the *F*-statistic is 177.28 and
+    the associated *p*-value is zero.
+    In this case the *F*-statistic is the square of the
+    *t*-statistic for the quadratic term in the linear model summary
+    for `results3` --- a consequence of the fact that these nested
+    models differ by one degree of freedom.
+    This provides very clear evidence that the quadratic polynomial in
+    `lstat` improves the linear model.
+    This is not surprising, since earlier we saw evidence for non-linearity in the relationship between `medv`
+    and  `lstat`.
+
+    The function `anova_lm()` can take more than two nested models
+    as input, in which case it compares every successive pair of models.
+    That also explains why there are `NaN`s in the first row above, since
+    there is no previous model with which to compare the first.
+    """)
+    return
+
+
+@app.cell
+def _(results3, subplots):
+    _ax = subplots(figsize=(8, 8))[1]
+    _ax.scatter(results3.fittedvalues, results3.resid)
+    _ax.set_xlabel('Fitted value')
+    _ax.set_ylabel('Residual')
+    _ax.axhline(0, c='k', ls='--')
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    We see that when the quadratic term is included in the model,
+    there is little discernible pattern in the residuals.
+    In order to create a cubic or higher-degree polynomial fit, we can simply change the degree argument
+    to `poly()`.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Qualitative Predictors
+    Here we use the  `Carseats`  data, which is included in the
+    `ISLP` package. We will  attempt to predict `Sales`
+    (child car seat sales) in 400 locations based on a number of
+    predictors.
+    """)
+    return
+
+
+@app.cell
+def _(load_data):
+    Carseats = load_data('Carseats')
+    Carseats.columns
+    return (Carseats,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    The `Carseats`
+     data includes qualitative predictors such as
+     `ShelveLoc`, an indicator of the quality of the shelving
+     location --- that is,
+    the  space within a store in which the car seat is displayed. The predictor
+     `ShelveLoc`  takes on three possible values, `Bad`, `Medium`, and `Good`.
+    Given a qualitative variable such as  `ShelveLoc`, `ModelSpec()` generates dummy
+    variables automatically.
+    These variables are often referred to as a *one-hot encoding* of the categorical
+    feature. Their columns sum to one, so to avoid collinearity with an intercept, the first column is dropped. Below we see
+    the column `ShelveLoc[Bad]` has been dropped, since `Bad` is the first level of `ShelveLoc`.
+    Below we fit a multiple regression model that includes some interaction terms.
+    """)
+    return
+
+
+@app.cell
+def _(Carseats, MS, sm, summarize):
+    allvars = list(Carseats.columns.drop('Sales'))
+    y_1 = Carseats['Sales']
+    final = allvars + [('Income', 'Advertising'), ('Price', 'Age')]
+    X_7 = MS(final).fit_transform(Carseats)
+    _model = sm.OLS(y_1, X_7)
+    summarize(_model.fit())
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    In the first line above, we made `allvars` a list, so that we
+    could add the interaction terms two lines down.
+    Our model-matrix builder has created a `ShelveLoc[Good]`
+    dummy variable that takes on a value of 1 if the
+    shelving location is good, and 0 otherwise. It has also created a `ShelveLoc[Medium]`
+    dummy variable that equals 1 if the shelving location is medium, and 0 otherwise.
+    A bad shelving location corresponds to a zero for each of the two dummy variables.
+    The fact that the coefficient for `ShelveLoc[Good]` in the regression output is
+    positive indicates that a good shelving location is associated with high sales (relative to a bad location).
+    And `ShelveLoc[Medium]` has a smaller positive coefficient,
+    indicating that a medium shelving location leads to higher sales than a bad
+    shelving location, but lower sales than a good shelving location.
+    """)
+    return
+
+
+if __name__ == "__main__":
+    app.run()

From 4a67183a1c69f35d7a8909852c572e0ce6f6b27d Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 07:35:52 +0400
Subject: [PATCH 04/13] .gitkeep no longer needed

---
 marimo/.gitkeep | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 marimo/.gitkeep

diff --git a/marimo/.gitkeep b/marimo/.gitkeep
deleted file mode 100644
index 8b137891..00000000
--- a/marimo/.gitkeep
+++ /dev/null
@@ -1 +0,0 @@
-

From 799bc2689cb9435cc4b2b8da795fb57001eccd1d Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 07:37:14 +0400
Subject: [PATCH 05/13] Add GitHub Actions workflow to test Marimo notebooks

---
 .github/workflows/rhiza_marimo.yml | 108 +++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 .github/workflows/rhiza_marimo.yml

diff --git a/.github/workflows/rhiza_marimo.yml b/.github/workflows/rhiza_marimo.yml
new file mode 100644
index 00000000..b4f52c1b
--- /dev/null
+++ b/.github/workflows/rhiza_marimo.yml
@@ -0,0 +1,108 @@
+# This file is part of the jebel-quant/rhiza repository
+# (https://github.com/jebel-quant/rhiza).
+#
+# Workflow: Marimo Notebooks
+#
+# Purpose: This workflow discovers and executes all Marimo notebooks in the
+#          repository. It builds a dynamic matrix to run each notebook in
+#          parallel to surface errors early and keep notebooks reproducible.
+#
+# Trigger: This workflow runs on every push and on pull requests to main/master
+#          branches (including from forks)
+#
+# Components:
+#   - 🔎 Discover notebooks in book/marimo
+#   - 🧪 Run each notebook in parallel using a matrix strategy
+#   - ✅ Fail-fast disabled to report all failing notebooks
+
+name: "(RHIZA) MARIMO"
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: [ main, master ]
+  pull_request:
+    branches: [ main, master ]
+
+jobs:
+  # Build a matrix of notebooks to test
+  list-notebooks:
+    runs-on: ubuntu-latest
+    outputs:
+      notebook-list: ${{ steps.notebooks.outputs.matrix }}
+    steps:
+      # Check out the repository code
+      - uses: actions/checkout@v6.0.2
+
+      # Find all Python files in the marimo folder and create a matrix for parallel execution
+      - name: Find notebooks and build matrix
+        id: notebooks
+        run: |
+          # Extract MARIMO_FOLDER from the project configuration (via Makefile)
+          # shellcheck disable=SC2016 # Single quotes intentional - Make syntax, not shell expansion
+          NOTEBOOK_DIR=$(make -s -f Makefile -f - <<< 'print: ; @echo $(or $(MARIMO_FOLDER),marimo)' print)
+
+          echo "Searching notebooks in: $NOTEBOOK_DIR"
+          # Check if directory exists
+          if [ ! -d "$NOTEBOOK_DIR" ]; then
+            echo "Directory $NOTEBOOK_DIR does not exist. Setting empty matrix."
+            echo "matrix=[]" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Find notebooks and handle empty results
+          if [ -z "$(find "$NOTEBOOK_DIR" -maxdepth 1 -name "*.py" 2>/dev/null)" ]; then
+            echo "No notebooks found in $NOTEBOOK_DIR. Setting empty matrix."
+            echo "matrix=[]" >> "$GITHUB_OUTPUT"
+          else
+            notebooks=$(find "$NOTEBOOK_DIR" -maxdepth 1 -name "*.py" -print0 | xargs -0 -n1 echo | jq -R -s -c 'split("\n")[:-1]')
+            echo "matrix=$notebooks" >> "$GITHUB_OUTPUT"
+          fi
+        shell: bash
+
+  # Create one job per notebook using the matrix strategy for parallel execution
+  test-notebooks:
+    if: needs.list-notebooks.outputs.notebook-list != '[]'
+    runs-on: ubuntu-latest
+    needs: list-notebooks
+    strategy:
+      matrix:
+        notebook: ${{ fromJson(needs.list-notebooks.outputs.notebook-list) }}
+      # Don't fail the entire workflow if one notebook fails
+      fail-fast: false
+    name: Run notebook ${{ matrix.notebook }}
+    steps:
+      # Check out the repository code
+      - uses: actions/checkout@v6.0.2
+        with:
+          lfs: true
+
+      # Install uv/uvx
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7.2.1
+        with:
+          version: "0.9.28"
+
+      - name: Configure git auth for private packages
+        uses: ./.github/actions/configure-git-auth
+        with:
+          token: ${{ secrets.GH_PAT }}
+
+      # Execute the notebook with the appropriate runner based on its content
+      - name: Run notebook
+        env:
+          UV_EXTRA_INDEX_URL: ${{ secrets.UV_EXTRA_INDEX_URL }}
+        run: |
+          uvx uv run "${{ matrix.notebook }}"
+          # uvx → creates a fresh ephemeral environment
+          # uv run → runs the notebook as a script in that ephemeral env
+          # No project packages are pre-installed
+          # ✅ This forces the notebook to explicitly handle dependencies (e.g., uv install ., or pip install inside the script).
+          # ✅ It’s a true integration smoke test.
+          # Benefits of this pattern
+          # Confirms the notebook can bootstrap itself in a fresh environment
+          # Catches missing uv install or pip steps early
+          # Ensures CI/other users can run the notebook without manual setup
+        shell: bash

From 61b7a14bc8334e0c7881275d41df5ad3bcd93d4a Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 07:41:16 +0400
Subject: [PATCH 06/13] Update rhiza_marimo.yml

---
 .github/workflows/rhiza_marimo.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/rhiza_marimo.yml b/.github/workflows/rhiza_marimo.yml
index b4f52c1b..c2d5238d 100644
--- a/.github/workflows/rhiza_marimo.yml
+++ b/.github/workflows/rhiza_marimo.yml
@@ -21,10 +21,7 @@ permissions:
   contents: read
 
 on:
-  push:
-    branches: [ main, master ]
-  pull_request:
-    branches: [ main, master ]
+  push
 
 jobs:
   # Build a matrix of notebooks to test

From 53002afcb36e8af5991da010d1a7c3539cbcef87 Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 07:42:48 +0400
Subject: [PATCH 07/13] Remove unused Git authentication step from Marimo
 workflow

---
 .github/workflows/rhiza_marimo.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/rhiza_marimo.yml b/.github/workflows/rhiza_marimo.yml
index c2d5238d..4109d6c5 100644
--- a/.github/workflows/rhiza_marimo.yml
+++ b/.github/workflows/rhiza_marimo.yml
@@ -82,11 +82,6 @@ jobs:
         with:
           version: "0.9.28"
 
-      - name: Configure git auth for private packages
-        uses: ./.github/actions/configure-git-auth
-        with:
-          token: ${{ secrets.GH_PAT }}
-
       # Execute the notebook with the appropriate runner based on its content
       - name: Run notebook
         env:

From 243370436b8617f92376301dcb6504d5df9642bf Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 07:46:28 +0400
Subject: [PATCH 08/13] script header for uv run with dependencies and
 requirements

---
 marimo/Ch03-linreg-lab.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
index 2d7baa4e..99dbdd0d 100644
--- a/marimo/Ch03-linreg-lab.py
+++ b/marimo/Ch03-linreg-lab.py
@@ -1,3 +1,13 @@
+# /// script
+# dependencies = [
+#   "marimo==0.19.6",
+#   "numpy==2.3.1",
+#   "plotly==6.2.0",
+#   "polars==1.32.2",
+# ]
+# requires-python = ">=3.12"
+# ///
+
 import marimo
 
 __generated_with = "0.19.7"

From 0a429f2c807ca17155f8b553decfde625395e575 Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 07:50:47 +0400
Subject: [PATCH 09/13] script header for uv run with dependencies and
 requirements

---
 marimo/Ch03-linreg-lab.py | 98 +++++++++++++++++++--------------------
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
index 99dbdd0d..7d1bb74a 100644
--- a/marimo/Ch03-linreg-lab.py
+++ b/marimo/Ch03-linreg-lab.py
@@ -4,6 +4,8 @@
 #   "numpy==2.3.1",
 #   "plotly==6.2.0",
 #   "polars==1.32.2",
+#   "pandas",
+#   "matplotlib"
 # ]
 # requires-python = ">=3.12"
 # ///
@@ -13,15 +15,11 @@
 __generated_with = "0.19.7"
 app = marimo.App()
 
-
-@app.cell
-def _():
+with app.setup:
     import marimo as mo
-    return (mo,)
-
-
+    
 @app.cell
-def _(mo):
+def _():
     mo.md(r"""
     # Linear Regression
     """)
@@ -29,7 +27,7 @@ def _(mo):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ## Importing packages
     We import our standard libraries at this top
@@ -47,7 +45,7 @@ def _():
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ### New imports
     Throughout this lab we will introduce new functions and libraries. However,
@@ -66,7 +64,7 @@ def _():
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     We will provide relevant details about the
     functions below as they are needed.
@@ -89,7 +87,7 @@ def _():
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     As one of the import statements above is quite a long line, we inserted a line break `\` to
     ease readability.
@@ -110,7 +108,7 @@ def _():
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ### Inspecting Objects and Namespaces
     The
@@ -128,7 +126,7 @@ def _():
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     This shows you everything that `Python` can find at the top level.
     There are certain objects like `__builtins__` that contain references to built-in
@@ -151,7 +149,7 @@ def _(np):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     This indicates that the object `A.sum` exists. In this case it is a method
     that can be used to compute the sum of the array `A` as can be seen by typing `A.sum?`.
@@ -166,7 +164,7 @@ def _(A):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
  
     """)
@@ -174,7 +172,7 @@ def _(mo):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ## Simple Linear Regression
     In this section we will  construct model
@@ -202,7 +200,7 @@ def _(load_data):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Type `Boston?` to find out more about these data.
 
@@ -223,7 +221,7 @@ def _(Boston, np, pd):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     We extract the response, and fit the model.
     """)
@@ -239,7 +237,7 @@ def _(Boston, X, sm):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Note that `sm.OLS()` does
     not fit the model; it specifies the model, and then `model.fit()` does the actual fitting.
@@ -260,7 +258,7 @@ def _(results, summarize):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Before we describe other methods for working with fitted models, we outline a more useful and general framework for constructing a model matrix~`X`.
     ### Using Transformations: Fit and Transform
@@ -305,7 +303,7 @@ def _(Boston, MS):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     In this simple case, the `fit()`  method does very little; it simply checks that the variable `'lstat'` specified in `design` exists in `Boston`. Then `transform()` constructs the model matrix with two columns: an `intercept` and the variable `lstat`.
 
@@ -324,7 +322,7 @@ def _(Boston, MS):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Note that, as in the previous code chunk when the two steps were done separately, the `design` object is changed as a result of the `fit()` operation. The power of this pipeline will become clearer when we fit more complex models that involve interactions and transformations.
     """)
@@ -332,7 +330,7 @@ def _(mo):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Let's return to our fitted regression model.
     The object
@@ -351,7 +349,7 @@ def _(results):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     The fitted coefficients can also be retrieved as the
     `params` attribute of `results`.
@@ -366,7 +364,7 @@ def _(results):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     The `get_prediction()`  method can be used to obtain predictions, and produce confidence intervals and
     prediction intervals for the prediction of  `medv`  for  given values of  `lstat`.
@@ -386,7 +384,7 @@ def _(design_1, pd):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Next we compute the predictions at `newX`, and view them by extracting the `predicted_mean` attribute.
     """)
@@ -401,7 +399,7 @@ def _(newX, results):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     We can produce confidence intervals for the predicted values.
     """)
@@ -415,7 +413,7 @@ def _(new_predictions):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Prediction intervals are computed by setting `obs=True`:
     """)
@@ -429,7 +427,7 @@ def _(new_predictions):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     For instance, the 95% confidence interval associated with an
      `lstat`  value of 10 is (24.47, 25.63), and the 95% prediction
@@ -447,7 +445,7 @@ def _(mo):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ### Defining Functions
     While there is a function
@@ -466,7 +464,7 @@ def abline(ax, b, m):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     A few things are illustrated above. First we see the syntax for defining a function:
     `def funcname(...)`. The function has arguments `ax, b, m`
@@ -486,7 +484,7 @@ def abline_1(ax, b, m, *args, **kwargs):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     The addition of `*args` allows any number of
     non-named arguments to `abline`, while `**kwargs` allows any
@@ -511,7 +509,7 @@ def _(Boston, results):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Thus, the final call to `ax.plot()` is `ax.plot(xlim, ylim, 'r--', linewidth=3)`.
     We have used the argument `'r--'` to produce a red dashed line, and added
@@ -524,7 +522,7 @@ def _(mo):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Next we examine some diagnostic plots, several of which were discussed
     in Section~\ref{Ch3:problems.sec}.
@@ -550,7 +548,7 @@ def _(results, subplots):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     We add a horizontal line at 0 for reference using the
      `ax.axhline()`   method, indicating
@@ -576,7 +574,7 @@ def _(X_2, np, results, subplots):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     The `np.argmax()`  function identifies the index of the largest element of an array, optionally computed over an axis of the array.
     In this case, we maximized over the entire array
@@ -586,7 +584,7 @@ def _(mo):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ## Multiple Linear Regression
     In order to fit a multiple linear regression model using least squares, we again use
@@ -609,7 +607,7 @@ def _(Boston, MS, sm, summarize, y):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Notice how we have compacted the first line into a succinct expression describing the construction of `X`.
 
@@ -628,7 +626,7 @@ def _(Boston):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     We can now fit the model with all the variables in `terms` using
     the same model matrix builder.
@@ -646,7 +644,7 @@ def _(Boston, MS, sm, summarize, terms, y):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     What if we would like to perform a regression using all of the variables but one?  For
     example, in the above regression output,   `age`  has a high $p$-value.
@@ -666,7 +664,7 @@ def _(Boston, MS, sm, summarize, y):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ## Multivariate Goodness of Fit
     We can access the individual components of `results` by name
@@ -705,7 +703,7 @@ def _(VIF, X_4, pd):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     The function `VIF()` takes two arguments: a dataframe or array,
     and a variable column index. In the code above we call `VIF()` on the fly for all columns in `X`.
@@ -725,7 +723,7 @@ def _(VIF, X_4):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     List comprehension allows us to perform such repetitive operations in a more straightforward way.
     ## Interaction Terms
@@ -746,7 +744,7 @@ def _(Boston, MS, sm, summarize, y):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ## Non-linear Transformations of the Predictors
     The model matrix builder can include terms beyond
@@ -768,7 +766,7 @@ def _(Boston, MS, poly, sm, summarize, y):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     The effectively zero *p*-value associated with the quadratic term
     (i.e. the third row above) suggests that it leads to an improved model.
@@ -798,7 +796,7 @@ def _(anova_lm, results1, results3):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     Here `results1` represents the linear submodel containing
     predictors `lstat` and `age`,
@@ -837,7 +835,7 @@ def _(results3, subplots):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     We see that when the quadratic term is included in the model,
     there is little discernible pattern in the residuals.
@@ -848,7 +846,7 @@ def _(mo):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     ## Qualitative Predictors
     Here we use the  `Carseats`  data, which is included in the
@@ -867,7 +865,7 @@ def _(load_data):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     The `Carseats`
      data includes qualitative predictors such as
@@ -897,7 +895,7 @@ def _(Carseats, MS, sm, summarize):
 
 
 @app.cell(hide_code=True)
-def _(mo):
+def _():
     mo.md(r"""
     In the first line above, we made `allvars` a list, so that we
     could add the interaction terms two lines down.

From 360e1eaf5f6c4833d8fcbf41921fc744db65d457 Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 08:01:36 +0400
Subject: [PATCH 10/13] Install ISLP library

---
 marimo/Ch03-linreg-lab.py | 51 ++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
index 7d1bb74a..6c9b1177 100644
--- a/marimo/Ch03-linreg-lab.py
+++ b/marimo/Ch03-linreg-lab.py
@@ -5,9 +5,13 @@
 #   "plotly==6.2.0",
 #   "polars==1.32.2",
 #   "pandas",
-#   "matplotlib"
+#   "matplotlib",
+#   "statsmodels",
+#   "ISLP"
 # ]
 # requires-python = ">=3.12"
+# [tool.uv.sources]
+#   ISLP = { git = "https://github.com/intro-stat-learning/ISLP_labs.git" }
 # ///
 
 import marimo
@@ -17,7 +21,10 @@
 
 with app.setup:
     import marimo as mo
-    
+    import numpy as np
+    import pandas as pd
+    import statsmodels.api as sm
+
 @app.cell
 def _():
     mo.md(r"""
@@ -38,10 +45,10 @@ def _():
 
 @app.cell
 def _():
-    import numpy as np
-    import pandas as pd
+    #import numpy as np
+    #import pandas as pd
     from matplotlib.pyplot import subplots
-    return np, pd, subplots
+    return subplots
 
 
 @app.cell(hide_code=True)
@@ -57,12 +64,6 @@ def _():
     return
 
 
-@app.cell
-def _():
-    import statsmodels.api as sm
-    return (sm,)
-
-
 @app.cell(hide_code=True)
 def _():
     mo.md(r"""
@@ -142,7 +143,7 @@ def _():
 
 
 @app.cell
-def _(np):
+def _():
     A = np.array([3,5,11])
     dir(A)
     return (A,)
@@ -213,7 +214,7 @@ def _():
 
 
 @app.cell
-def _(Boston, np, pd):
+def _(Boston):
     X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]),
                       'lstat': Boston['lstat']})
     X[:4]
@@ -229,7 +230,7 @@ def _():
 
 
 @app.cell
-def _(Boston, X, sm):
+def _(Boston, X):
     y = Boston['medv']
     _model = sm.OLS(y, X)
     results = _model.fit()
@@ -376,7 +377,7 @@ def _():
 
 
 @app.cell
-def _(design_1, pd):
+def _(design_1):
     new_df = pd.DataFrame({'lstat': [5, 10, 15]})
     newX = design_1.transform(new_df)
     newX
@@ -458,9 +459,9 @@ def _():
 @app.function
 def abline(ax, b, m):
     """Add a line with slope m and intercept b to ax"""
-    xlim = _ax.get_xlim()
+    xlim = ax.get_xlim()
     ylim = [m * xlim[0] + b, m * xlim[1] + b]
-    _ax.plot(xlim, ylim)
+    ax.plot(xlim, ylim)
 
 
 @app.cell(hide_code=True)
@@ -563,7 +564,7 @@ def _():
 
 
 @app.cell
-def _(X_2, np, results, subplots):
+def _(X_2, results, subplots):
     infl = results.get_influence()
     _ax = subplots(figsize=(8, 8))[1]
     _ax.scatter(np.arange(X_2.shape[0]), infl.hat_matrix_diag)
@@ -598,7 +599,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, sm, summarize, y):
+def _(Boston, MS, summarize, y):
     X_3 = MS(['lstat', 'age']).fit_transform(Boston)
     _model1 = sm.OLS(y, X_3)
     results1 = _model1.fit()
@@ -635,7 +636,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, sm, summarize, terms, y):
+def _(Boston, MS, summarize, terms, y):
     X_4 = MS(terms).fit_transform(Boston)
     _model = sm.OLS(y, X_4)
     results_1 = _model.fit()
@@ -655,7 +656,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, sm, summarize, y):
+def _(Boston, MS, summarize, y):
     minus_age = Boston.columns.drop(['medv', 'age'])
     Xma = MS(minus_age).fit_transform(Boston)
     _model1 = sm.OLS(y, Xma)
@@ -695,7 +696,7 @@ def _():
 
 
 @app.cell
-def _(VIF, X_4, pd):
+def _(VIF, X_4):
     _vals = [VIF(X_4, i) for i in range(1, X_4.shape[1])]
     vif = pd.DataFrame({'vif': _vals}, index=X_4.columns[1:])
     vif
@@ -736,7 +737,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, sm, summarize, y):
+def _(Boston, MS, summarize, y):
     X_5 = MS(['lstat', 'age', ('lstat', 'age')]).fit_transform(Boston)
     model2 = sm.OLS(y, X_5)
     summarize(model2.fit())
@@ -757,7 +758,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, poly, sm, summarize, y):
+def _(Boston, MS, poly, summarize, y):
     X_6 = MS([poly('lstat', degree=2), 'age']).fit_transform(Boston)
     model3 = sm.OLS(y, X_6)
     results3 = model3.fit()
@@ -884,7 +885,7 @@ def _():
 
 
 @app.cell
-def _(Carseats, MS, sm, summarize):
+def _(Carseats, MS, summarize):
     allvars = list(Carseats.columns.drop('Sales'))
     y_1 = Carseats['Sales']
     final = allvars + [('Income', 'Advertising'), ('Price', 'Age')]

From 8cd0096d5881adb4a46eb49e52d7c8e338e3b7d2 Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 08:12:17 +0400
Subject: [PATCH 11/13] Install ISLP library

---
 marimo/Ch03-linreg-lab.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
index 6c9b1177..bf019f9f 100644
--- a/marimo/Ch03-linreg-lab.py
+++ b/marimo/Ch03-linreg-lab.py
@@ -10,13 +10,11 @@
 #   "ISLP"
 # ]
 # requires-python = ">=3.12"
-# [tool.uv.sources]
-#   ISLP = { git = "https://github.com/intro-stat-learning/ISLP_labs.git" }
 # ///
 
 import marimo
 
-__generated_with = "0.19.7"
+__generated_with = "0.19.6"
 app = marimo.App()
 
 with app.setup:
@@ -25,6 +23,7 @@
     import pandas as pd
     import statsmodels.api as sm
 
+
 @app.cell
 def _():
     mo.md(r"""
@@ -48,7 +47,7 @@ def _():
     #import numpy as np
     #import pandas as pd
     from matplotlib.pyplot import subplots
-    return subplots
+    return (subplots,)
 
 
 @app.cell(hide_code=True)

From 5bf79e5de9324728c1d51af94e5275012dd7048b Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 08:16:51 +0400
Subject: [PATCH 12/13] Remove unused imports and streamline function arguments
 in Ch03-linreg-lab.py

---
 marimo/Ch03-linreg-lab.py | 73 ++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 44 deletions(-)

diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
index bf019f9f..f5623e20 100644
--- a/marimo/Ch03-linreg-lab.py
+++ b/marimo/Ch03-linreg-lab.py
@@ -22,8 +22,18 @@
     import numpy as np
     import pandas as pd
     import statsmodels.api as sm
-
-
+    
+    # plotting
+    from matplotlib.pyplot import subplots
+    
+    from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
+    from statsmodels.stats.anova import anova_lm
+    
+    from ISLP import load_data
+    from ISLP.models import (ModelSpec as MS,
+                             summarize,
+                             poly)
+    
 @app.cell
 def _():
     mo.md(r"""
@@ -42,14 +52,6 @@ def _():
     return
 
 
-@app.cell
-def _():
-    #import numpy as np
-    #import pandas as pd
-    from matplotlib.pyplot import subplots
-    return (subplots,)
-
-
 @app.cell(hide_code=True)
 def _():
     mo.md(r"""
@@ -78,14 +80,6 @@ def _():
     return
 
 
-@app.cell
-def _():
-    from statsmodels.stats.outliers_influence \
-         import variance_inflation_factor as VIF
-    from statsmodels.stats.anova import anova_lm
-    return VIF, anova_lm
-
-
 @app.cell(hide_code=True)
 def _():
     mo.md(r"""
@@ -98,15 +92,6 @@ def _():
     return
 
 
-@app.cell
-def _():
-    from ISLP import load_data
-    from ISLP.models import (ModelSpec as MS,
-                             summarize,
-                             poly)
-    return MS, load_data, poly, summarize
-
-
 @app.cell(hide_code=True)
 def _():
     mo.md(r"""
@@ -193,7 +178,7 @@ def _():
 
 
 @app.cell
-def _(load_data):
+def _():
     Boston = load_data("Boston")
     Boston.columns
     return (Boston,)
@@ -252,7 +237,7 @@ def _():
 
 
 @app.cell
-def _(results, summarize):
+def _(results):
     summarize(results)
     return
 
@@ -294,7 +279,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS):
+def _(Boston):
     design = MS(['lstat'])
     design = design.fit(Boston)
     X_1 = design.transform(Boston)
@@ -314,7 +299,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS):
+def _(Boston):
     design_1 = MS(['lstat'])
     X_2 = design_1.fit_transform(Boston)
     X_2[:4]
@@ -538,7 +523,7 @@ def _():
 
 
 @app.cell
-def _(results, subplots):
+def _(results):
     _ax = subplots(figsize=(8, 8))[1]
     _ax.scatter(results.fittedvalues, results.resid)
     _ax.set_xlabel('Fitted value')
@@ -563,7 +548,7 @@ def _():
 
 
 @app.cell
-def _(X_2, results, subplots):
+def _(X_2, results):
     infl = results.get_influence()
     _ax = subplots(figsize=(8, 8))[1]
     _ax.scatter(np.arange(X_2.shape[0]), infl.hat_matrix_diag)
@@ -598,7 +583,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, summarize, y):
+def _(Boston, y):
     X_3 = MS(['lstat', 'age']).fit_transform(Boston)
     _model1 = sm.OLS(y, X_3)
     results1 = _model1.fit()
@@ -635,7 +620,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, summarize, terms, y):
+def _(Boston, terms, y):
     X_4 = MS(terms).fit_transform(Boston)
     _model = sm.OLS(y, X_4)
     results_1 = _model.fit()
@@ -655,7 +640,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, summarize, y):
+def _(Boston, y):
     minus_age = Boston.columns.drop(['medv', 'age'])
     Xma = MS(minus_age).fit_transform(Boston)
     _model1 = sm.OLS(y, Xma)
@@ -695,7 +680,7 @@ def _():
 
 
 @app.cell
-def _(VIF, X_4):
+def _(X_4):
     _vals = [VIF(X_4, i) for i in range(1, X_4.shape[1])]
     vif = pd.DataFrame({'vif': _vals}, index=X_4.columns[1:])
     vif
@@ -715,7 +700,7 @@ def _():
 
 
 @app.cell
-def _(VIF, X_4):
+def _(X_4):
     _vals = []
     for i in range(1, X_4.values.shape[1]):
         _vals.append(VIF(X_4.values, i))
@@ -736,7 +721,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, summarize, y):
+def _(Boston, y):
     X_5 = MS(['lstat', 'age', ('lstat', 'age')]).fit_transform(Boston)
     model2 = sm.OLS(y, X_5)
     summarize(model2.fit())
@@ -757,7 +742,7 @@ def _():
 
 
 @app.cell
-def _(Boston, MS, poly, summarize, y):
+def _(Boston, y):
     X_6 = MS([poly('lstat', degree=2), 'age']).fit_transform(Boston)
     model3 = sm.OLS(y, X_6)
     results3 = model3.fit()
@@ -790,7 +775,7 @@ def _():
 
 
 @app.cell
-def _(anova_lm, results1, results3):
+def _(results1, results3):
     anova_lm(results1, results3)
     return
 
@@ -825,7 +810,7 @@ def _():
 
 
 @app.cell
-def _(results3, subplots):
+def _(results3):
     _ax = subplots(figsize=(8, 8))[1]
     _ax.scatter(results3.fittedvalues, results3.resid)
     _ax.set_xlabel('Fitted value')
@@ -858,7 +843,7 @@ def _():
 
 
 @app.cell
-def _(load_data):
+def _():
     Carseats = load_data('Carseats')
     Carseats.columns
     return (Carseats,)
@@ -884,7 +869,7 @@ def _():
 
 
 @app.cell
-def _(Carseats, MS, summarize):
+def _(Carseats):
     allvars = list(Carseats.columns.drop('Sales'))
     y_1 = Carseats['Sales']
     final = allvars + [('Income', 'Advertising'), ('Price', 'Age')]

From 29a7f7e3c7b186b90512378591ffe622eee16f18 Mon Sep 17 00:00:00 2001
From: Thomas Schmelzer <thomas.schmelzer@gmail.com>
Date: Wed, 4 Feb 2026 08:31:30 +0400
Subject: [PATCH 13/13] Remove unused dependencies and reorder imports in
 Ch03-linreg-lab.py

---
 marimo/Ch03-linreg-lab.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/marimo/Ch03-linreg-lab.py b/marimo/Ch03-linreg-lab.py
index f5623e20..4a7c6687 100644
--- a/marimo/Ch03-linreg-lab.py
+++ b/marimo/Ch03-linreg-lab.py
@@ -2,8 +2,6 @@
 # dependencies = [
 #   "marimo==0.19.6",
 #   "numpy==2.3.1",
-#   "plotly==6.2.0",
-#   "polars==1.32.2",
 #   "pandas",
 #   "matplotlib",
 #   "statsmodels",
@@ -21,11 +19,10 @@
     import marimo as mo
     import numpy as np
     import pandas as pd
-    import statsmodels.api as sm
-    
-    # plotting
+
     from matplotlib.pyplot import subplots
-    
+
+    import statsmodels.api as sm
     from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
     from statsmodels.stats.anova import anova_lm