From 970c0046de57251d7d620f2bc63a30e763d1f285 Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Mon, 8 Sep 2025 14:58:41 -0400 Subject: [PATCH 1/7] Inital narwhals implementation --- environment.yml | 1 + pyproject.toml | 1 + requirements.txt | 3 ++- statstables/tables.py | 34 +++++++++++++++++++--------------- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/environment.yml b/environment.yml index 40b1fdc..2753e14 100644 --- a/environment.yml +++ b/environment.yml @@ -14,4 +14,5 @@ dependencies: - linearmodels - unicodeit - Faker + - narwhals \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 9dbed53..71f9ddd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "statsmodels", "linearmodels", "unicodeit", + "narwhals" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 17d43bc..d39f13c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ scipy statsmodels linearmodels unicodeit -Faker \ No newline at end of file +Faker +narwhals \ No newline at end of file diff --git a/statstables/tables.py b/statstables/tables.py index cc4bc86..0a5f233 100644 --- a/statstables/tables.py +++ b/statstables/tables.py @@ -3,6 +3,8 @@ import pandas as pd import numpy as np import statstables as st +import narwhals as nw +from narwhals.typing import IntoDataFrame from abc import ABC, abstractmethod from scipy import stats from typing import Union, Callable @@ -749,11 +751,11 @@ class GenericTable(Table): column/index naming """ - def __init__(self, df: pd.DataFrame | pd.Series, **kwargs): - self.df = df - self.ncolumns = df.shape[1] - self.columns = df.columns - self.nrows = df.shape[0] + def __init__(self, df: IntoDataFrame, **kwargs): + self.df = nw.from_native(df).to_pandas() + self.ncolumns = self.df.shape[1] + self.columns = self.df.columns + self.nrows = self.df.shape[0] super().__init__(**kwargs) def reset_params(self, restore_to_defaults=False): @@ -785,7 +787,7 @@ def _create_rows(self): class MeanDifferenceTable(Table): def __init__( self, - df: pd.DataFrame, + df: IntoDataFrame, var_list: list, group_var: str, diff_pairs: list[tuple] | None = None, @@ -818,7 +820,7 @@ def __init__( Parameters ---------- - df : pd.DataFrame + df : IntoDataFrame DataFrame containing the raw data to be compared var_list : list List of variables to compare means to between the groups @@ -851,7 +853,8 @@ def __init__( } self.table_params = MeanDiffsTableParams(user_params) # TODO: allow for grouping on multiple variables - self.groups = df[group_var].unique() + self.df = nw.from_native(df).to_pandas() + self.groups = self.df[group_var].unique() self.ngroups = len(self.groups) self.var_list = var_list if self.ngroups > 2 and not diff_pairs: @@ -861,14 +864,14 @@ def __init__( if self.ngroups < 2: raise ValueError("There must be at least two groups") self.alternative = alternative - self.type_gdf = df.groupby(group_var) + self.type_gdf = self.df.groupby(group_var) # adjust these to only count non-null values self.grp_sizes = self.type_gdf.size() - self.grp_sizes["Overall Mean"] = df.shape[0] + self.grp_sizes["Overall Mean"] = self.df.shape[0] self.means = self.type_gdf[var_list].mean().T # add toal means column to means - self.means["Overall Mean"] = df[var_list].mean() - total_sem = df[var_list].sem() + self.means["Overall Mean"] = self.df[var_list].mean() + total_sem = self.df[var_list].sem() total_sem.name = "Overall Mean" self.sem = pd.merge( self.type_gdf[var_list].sem().T, @@ -1067,10 +1070,11 @@ def _create_rows(self): class SummaryTable(GenericTable): - def __init__(self, df: pd.DataFrame, var_list: list[str] | None = None, **kwargs): + def __init__(self, df: IntoDataFrame, var_list: list[str] | None = None, **kwargs): + self.df = nw.from_native(df).to_pandas() if var_list is None: - var_list = df.columns - summary_df = df[var_list].describe() + var_list = self.df.columns + summary_df = self.df[var_list].describe() super().__init__(summary_df, **kwargs) # self.reset_custom_features() From a6b0aea16f3c534c1fc86d1feab1d5576226ce2d Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Wed, 5 Nov 2025 10:50:04 -0500 Subject: [PATCH 2/7] add missing comma to pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e2323a0..9c76f63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ test = [ "statsmodels", "linearmodels", "unicodeit", - "narwhals" + "narwhals", "pyfixest", "faker", ] From aef1ded0f69687ba236e57f7793f5650ca1a223b Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Wed, 5 Nov 2025 11:09:01 -0500 Subject: [PATCH 3/7] working polars test --- pyproject.toml | 2 ++ statstables/tests/test_tables.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 9c76f63..6835195 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,8 @@ test = [ "narwhals", "pyfixest", "faker", + "polars", + "pyarrow", ] dev = [ diff --git a/statstables/tests/test_tables.py b/statstables/tests/test_tables.py index 07e7146..58203f6 100644 --- a/statstables/tests/test_tables.py +++ b/statstables/tests/test_tables.py @@ -5,6 +5,7 @@ import copy import pytest import pandas as pd +import polars as pl import numpy as np import statsmodels.formula.api as smf import pyfixest as pf @@ -52,6 +53,14 @@ def test_generic_table(data): table = tables.GenericTable(df) print(table) + # test with polars dataframe to test narwhals implementation + pl_data = pl.from_pandas(data) + table = tables.GenericTable(df=pl_data) + + table.render_ascii() + table.render_html() + table.render_latex() + def test_summary_table(data): table = tables.SummaryTable(df=data, var_list=["A", "B", "C"]) From 2b15d7aa6a5537259726235ecf6397eccf599d95 Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Thu, 6 Nov 2025 15:04:00 -0500 Subject: [PATCH 4/7] update testing dependencies; also test against python 3.14 --- .github/workflows/python-package.yml | 2 +- requirements.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index fa4532d..0e60247 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 diff --git a/requirements.txt b/requirements.txt index 6e6a097..463a6a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ scipy unicodeit Faker narwhals +polars +pyarrow \ No newline at end of file From 1fd420cc10414e2f506e54aa0a0d84236f6792c4 Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Tue, 9 Dec 2025 14:50:52 -0500 Subject: [PATCH 5/7] bump scipy version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6835195..5ab2456 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ classifiers = [ dependencies = [ "numpy", "pandas>=2", - "scipy", + "scipy>=1.6.1", "unicodeit", ] From 1bb624dd4a7beb86898e519dd862011b4b14bbf7 Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Tue, 9 Dec 2025 15:06:37 -0500 Subject: [PATCH 6/7] version bumps --- main.pdf | Bin 95462 -> 95460 bytes pyfixest_tables.tex | 6 +++--- pyproject.toml | 5 +++-- samplenotebook.ipynb | 20 +++++++++++--------- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/main.pdf b/main.pdf index df3530c059e41f1d2d22572ff2a53d323290185d..d0084941d3126f5d6f1c9bd36f8e09b2d2906c7f 100644 GIT binary patch delta 622 zcmaF%lJ&_;)(xF9OeU6-7jkNB-YjF!T)#=%O)~pe`Llfr)A)63or{!}RrVgW&G0^8 zCc$Icb8Fwydo7$B_InCExqjwrgIeR}_%mORK3x^_Z|cH_M`ul1J8PDo`_h;PiWZB) zKg@ggLf$86^Xv)MdEbnVN<4EZ_-L>*u~ABPhTtywZmCOa79VEUIc@%E$p)i0FF6-Y zc)@>7GE%nmNr@xdjaQuujbd(9*39PNFn89BcDQ=`)OXDk?z5M-Ykm_n+4O!zb}c zdkWj{DKPH$)G;!!G&D6ZGd9vSFjY4&P}k(r_svgnNi0cK&~ULbGB7eVge%#8GK+B$ zN05`HlYya|lewF@nT4U5vAMH_iL0B5lckxHfs=)qv4Nd}4M8QbT>3t#dFdq?3Py$& z3ZPK0mp!%DtEo`t=*ME6;OfdPCR;a|yp4_7-8k)7i<1CPyTS{}y&W&CN=i;|Ja}8+ z)*;=1jI34vEl;<8aiuHp@TE(LZD>rtd3ZRNye;QdM>JcjE#8^!NtG delta 623 zcmaFzlJ(h3)(xF9jHZ(paB6PeEMw1HzfIdsGW%D#)bFMUyDeXMPxkgq_#*awfiC0i z1cNIES-+=M39Fs>>)WKTKW!bm3Fn-@N$d3W!k>Ox&%>*KZA#A7Dpu+Z? zZQb3>pn6Z4&spfWPHmbz6KC|k8}a4Oowq4(ah|qVP)w-&&bp=ln0+oEl$@L>Biwvf zVf$SL#{Hf;h6bjF21e#aX1WHZ>IMetnq2z6`6(`mC8-J;E>=bcMy7^vCEHJAF)rc= zGIKGvbhdCZGBa~^G&DCeuyAp)a5FM>G&FNEHnns#uv4%hs3ew4-zPOMy(B}y$k0+@ z@m0vmcyFYRfafxydF%r8~RAI>}tow_SJrWzQx>5F)4gIVY>c0wwt>cZr1U{^fR01H@>TW rP$ss+pNYS`&Q3HvWxr_IVWoeJNeQw&)Ay7xDsUKcsj9mAyKw;k=2", - "scipy>=1.6.1", + "scipy", "unicodeit", ] @@ -46,7 +47,7 @@ test = [ "linearmodels", "unicodeit", "narwhals", - "pyfixest", + "pyfixest>=0.40.1", "faker", "polars", "pyarrow", diff --git a/samplenotebook.ipynb b/samplenotebook.ipynb index 0d437c7..0f83c2e 100644 --- a/samplenotebook.ipynb +++ b/samplenotebook.ipynb @@ -69,7 +69,7 @@ "type": "integer" } ], - "ref": "1f4fb5a8-29a3-4f4a-b861-1b2cd019cfd6", + "ref": "dabae8ec-9941-4705-bacb-d4968ec32494", "rows": [ [ "0", @@ -3523,7 +3523,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" + "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n", + "/Users/andersonfrailey/opt/anaconda3/envs/statstables-dev/lib/python3.13/site-packages/pyfixest/estimation/model_matrix_fixest_.py:215: UserWarning: 2 singleton fixed effect(s) detected. These observations are dropped from the model.\n", + " warnings.warn(\n" ] }, { @@ -3553,9 +3555,9 @@ " \n", " \n", "\n", - " (0.066)\n", + " (0.060)\n", "\n", - " (0.035)\n", + " (0.042)\n", "\n", " \n", " \n", @@ -3571,7 +3573,7 @@ "\n", " \n", "\n", - " (0.010)\n", + " (0.011)\n", "\n", " \n", " \n", @@ -3582,7 +3584,7 @@ "\n", " 997\n", "\n", - " 997\n", + " 995\n", "\n", " \n", " \n", @@ -3604,11 +3606,11 @@ "+ (1) (2) +\n", "+------------------------------------------------+\n", "+ X1 -0.919*** -0.007 +\n", - "+ (0.066) (0.035) +\n", + "+ (0.060) (0.042) +\n", "+ X2 -0.015 +\n", - "+ (0.010) +\n", + "+ (0.011) +\n", "--------------------------------------------------\n", - "+ Observations 997 997 +\n", + "+ Observations 997 995 +\n", "+ R² 0.609 +\n", "--------------------------------------------------\n", "*p<0.1, **p<0.05, ***p<0.01 " From 24105042a6b4f4c6751753404e2cc81186e6806a Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Tue, 9 Dec 2025 15:10:31 -0500 Subject: [PATCH 7/7] remove official 3.14 support --- .github/workflows/python-package.yml | 2 +- pyproject.toml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0e60247..fa4532d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v3 diff --git a/pyproject.toml b/pyproject.toml index fa5feed..b201b09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,6 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "Development Status :: 2 - Pre-Alpha" ]