diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9bf4aa1..fa4532d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -5,13 +5,12 @@ name: Python package on: push: - branches: [ "main" ] + branches: ["main"] pull_request: - branches: [ "main" ] + branches: ["main"] jobs: build: - runs-on: ubuntu-latest strategy: fail-fast: false @@ -19,22 +18,23 @@ jobs: python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f test_requirements.txt ]; then pip install -r test_requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest diff --git a/ivtable.tex b/ivtable.tex index 5b2153c..bb0e420 100644 --- a/ivtable.tex +++ b/ivtable.tex @@ -21,7 +21,6 @@ Observations & 428 & 428 & 428 \\ $R^2$ & 0.118 & 0.173 & 0.093 \\ F Statistic & 57.196*** & 89.258*** & 2.849* \\ - Model & OLS & OLS & IV-2SLS \\ \bottomrule \multicolumn{4}{l}{{\small \textit{*p$<$0.1, **p$<$0.05, ***p$<$0.01}}}\\ \end{tabular} diff --git a/main.pdf b/main.pdf index 7b94e28..df3530c 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/main.tex b/main.tex index adf2fb7..8b53b4e 100644 --- a/main.tex +++ b/main.tex @@ -33,6 +33,12 @@ \input{panel.tex} \end{table} +\begin{table} + \centering + \caption{\texttt{pyfixest} Table} + \input{pyfixest_tables.tex} +\end{table} + \pagebreak \input{longtable.tex} diff --git a/pyfixest_tables.tex b/pyfixest_tables.tex new file mode 100644 index 0000000..8ffd3e0 --- /dev/null +++ b/pyfixest_tables.tex @@ -0,0 +1,22 @@ +% You must add \usepackage{booktabs} to your LaTex document for table to compile. +% If you use color in your formatting, you must also add \usepackage{xcolor} to the preamble. + +% If you are making a longtable, you must add \usepackage{longtable} to the preamble. + +\begin{tabular}{lcc} + \toprule + \toprule + & \multicolumn{2}{c}{Dependent Variable: Y} \\ + \cmidrule(lr){2-3} + & (1) & (2)\\ + \midrule + X1 & -0.919*** & -0.007 \\ + & (0.066) & (0.035) \\ + X2 & & -0.015 \\ + & & (0.010) \\ + \midrule + Observations & 997 & 997 \\ + $R^2$ & 0.609 & \\ + \bottomrule + \multicolumn{3}{l}{{\small \textit{*p$<$0.1, **p$<$0.05, ***p$<$0.01}}}\\ +\end{tabular} diff --git a/pyproject.toml b/pyproject.toml index 9dbed53..96b182a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,21 @@ dependencies = [ "numpy", "pandas>=2", "scipy", + "unicodeit", +] + +[project.optional-dependencies] +test = [ + "pytest>=6.0", + "pytest-cov>=2.0", "statsmodels", "linearmodels", - "unicodeit", + "pyfixest", + "faker", +] + +dev = [ + "statstables[test]" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 17d43bc..08a9e16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,4 @@ numpy pandas scipy -statsmodels -linearmodels unicodeit -Faker \ No newline at end of file diff --git a/samplenotebook.ipynb b/samplenotebook.ipynb index e341b1e..0d437c7 100644 --- a/samplenotebook.ipynb +++ b/samplenotebook.ipynb @@ -1,3545 +1,3663 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import statstables as st\n", - "from statstables import tables\n", - "from pathlib import Path\n", - "from faker import Faker" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "np.random.seed(5410)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "A", - "rawType": "float64", - "type": "float" - }, - { - "name": "B", - "rawType": "float64", - "type": "float" - }, - { - "name": "C", - "rawType": "float64", - "type": "float" - }, - { - "name": "group", - "rawType": "object", - "type": "string" - }, - { - "name": "binary", - "rawType": "int64", - "type": "integer" - } + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import statstables as st\n", + "from statstables import tables\n", + "from pathlib import Path\n", + "from faker import Faker" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "np.random.seed(5410)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "A", + "rawType": "float64", + "type": "float" + }, + { + "name": "B", + "rawType": "float64", + "type": "float" + }, + { + "name": "C", + "rawType": "float64", + "type": "float" + }, + { + "name": "group", + "rawType": "object", + "type": "string" + }, + { + "name": "binary", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "1f4fb5a8-29a3-4f4a-b861-1b2cd019cfd6", + "rows": [ + [ + "0", + "-1.326374839207587", + "1.5517080238172218", + "-0.6779442115297923", + "X", + "0" + ], + [ + "1", + "-0.4855014911590015", + "-1.6752278648482863", + "0.7029360254882143", + "X", + "0" + ], + [ + "2", + "0.15854401045608701", + "-2.0374028524818484", + "-0.6588901384541379", + "X", + "0" + ], + [ + "3", + "0.6144153695899064", + "0.5720661339871327", + "-0.41524628910099726", + "X", + "1" + ], + [ + "4", + "-0.22945697966169146", + "-0.5443314119709048", + "0.9058529965429116", + "X", + "0" + ], + [ + "5", + "1.0218027157053844", + "2.319224741274596", + "-1.4633882228990143", + "X", + "1" + ], + [ + "6", + "-1.6534774140963884", + "1.0565307111567663", + "0.09874353742518119", + "X", + "1" + ], + [ + "7", + "-1.2851359202811061", + "0.36034746199076184", + "-0.6071031092237691", + "X", + "0" + ], + [ + "8", + "1.00178310570641", + "0.18487795842319749", + "-0.9619610448171395", + "X", + "0" + ], + [ + "9", + "2.138406992327448", + "0.8158131680238252", + "-1.121991974191712", + "X", + "1" + ], + [ + "10", + "-0.041020995857143104", + "-0.345976858342248", + "-0.06781507695173056", + "X", + "0" + ], + [ + "11", + "-1.0191792128268418", + "-1.3623899746524384", + "-0.4018779636000747", + "X", + "0" + ], + [ + "12", + "-1.5780440132393516", + "-1.4758170637977432", + "-0.9204884657850502", + "X", + "0" + ], + [ + "13", + "-2.7821184227141362", + "1.1661215555211222", + "-0.22903931136584582", + "X", + "0" + ], + [ + "14", + "0.612444485087259", + "-1.9522004724356903", + "1.207819918929549", + "X", + "0" + ], + [ + "15", + "-1.12542077250989", + "-0.4865318442699179", + "-0.5079821633214079", + "X", + "0" + ], + [ + "16", + "1.3881469695585034", + "-0.6743886022897816", + "-0.49441724565810297", + "X", + "0" + ], + [ + "17", + "0.3846769833383076", + "0.5494485914990322", + "-0.874435697972917", + "X", + "0" + ], + [ + "18", + "-0.6382852947258157", + "0.960849394573923", + "-0.8266610963220111", + "X", + "1" + ], + [ + "19", + "0.09891398676439228", + "0.33274285719365226", + "-0.7282369703709443", + "X", + "0" + ], + [ + "20", + "-0.21220271424390932", + "0.30085882729636454", + "-1.410648226501225", + "X", + "0" + ], + [ + "21", + "-0.08278421444838444", + "0.8531716602166252", + "0.22445249026168654", + "X", + "0" + ], + [ + "22", + "0.5309432936590944", + "-1.0960913094021334", + "-0.1891808692956873", + "X", + "0" + ], + [ + "23", + "-1.2293589424460318", + "0.2812838498470437", + "-1.4802271854765883", + "X", + "0" + ], + [ + "24", + "-0.188486153185734", + "1.1343977926097972", + "-0.749516964627805", + "X", + "0" + ], + [ + "25", + "1.3106347359496997", + "0.8138481160482607", + "-0.7200817709168441", + "X", + "1" + ], + [ + "26", + "0.34204700942623045", + "2.9508871835399986", + "0.41977147475509236", + "X", + "0" + ], + [ + "27", + "0.8741597082079612", + "-1.0773216841925883", + "-0.995988015219408", + "X", + "1" + ], + [ + "28", + "1.9076741268422726", + "0.8726540928378016", + "0.5599070378193184", + "X", + "1" + ], + [ + "29", + "0.4280244542935084", + "1.1332058243521241", + "0.6861015391597531", + "X", + "0" + ], + [ + "30", + "-0.07074245324882782", + "0.9073428058637397", + "0.23465627018758284", + "X", + "0" + ], + [ + "31", + "-0.2005748640289552", + "-0.38594841356108306", + "0.3592344723810465", + "X", + "0" + ], + [ + "32", + "-0.7622620547215659", + "-0.17348227732753893", + "0.8731956414013242", + "X", + "0" + ], + [ + "33", + "-0.05289315952085553", + "0.5673926824088147", + "-0.9892282636172631", + "X", + "0" + ], + [ + "34", + "-0.6626650103175459", + "-0.8570943948110474", + "-0.02233670618972126", + "X", + "1" + ], + [ + "35", + "-1.1474933334641009", + "0.505438661935886", + "-0.5433418885255469", + "X", + "0" + ], + [ + "36", + "-0.5841443544432646", + "0.8618696026574305", + "-0.6850261091048194", + "X", + "0" + ], + [ + "37", + "-0.6093540570131564", + "0.26240798010125793", + "-0.19425845748858184", + "X", + "0" + ], + [ + "38", + "0.3634884000557077", + "0.345066728509237", + "0.6262002701728635", + "X", + "0" + ], + [ + "39", + "-0.9226668426854373", + "1.5831474835534", + "0.7478804194993391", + "X", + "0" + ], + [ + "40", + "0.42231480313163766", + "-0.06740675699875796", + "-0.3524079913423599", + "X", + "0" + ], + [ + "41", + "-0.004549436134780775", + "-0.09569745449085118", + "0.268451733909938", + "X", + "0" + ], + [ + "42", + "0.8915648160884971", + "-0.1473205948055779", + "0.33760125161512755", + "X", + "1" + ], + [ + "43", + "2.816976093400371", + "-1.258785583396147", + "0.33164346064316313", + "X", + "0" + ], + [ + "44", + "-0.5581139854121363", + "1.0664144307134953", + "0.8943885471344221", + "X", + "0" + ], + [ + "45", + "-0.025765332962440927", + "-1.0539201156859819", + "0.48726199769554035", + "X", + "0" + ], + [ + "46", + "0.04648735293223192", + "1.0964111222283048", + "2.7220365467358634", + "X", + "0" + ], + [ + "47", + "0.5252086141014584", + "1.307864542164725", + "0.8367160166732635", + "X", + "1" + ], + [ + "48", + "0.20646376321873006", + "-0.3997440672940264", + "1.2618602613123642", + "X", + "0" + ], + [ + "49", + "-1.6161032559576074", + "1.8243597442427602", + "1.6204498314617477", + "X", + "0" + ] + ], + "shape": { + "columns": 5, + "rows": 300 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCgroupbinary
0-1.3263751.551708-0.677944X0
1-0.485501-1.6752280.702936X0
20.158544-2.037403-0.658890X0
30.6144150.572066-0.415246X1
4-0.229457-0.5443310.905853X0
..................
950.562179-0.9677571.226888Z1
961.137781-0.7035741.129964Z1
97-0.9375131.7473370.907810Z1
980.7031331.2066231.129779Z0
991.042202-0.056653-0.047265Z1
\n", + "

300 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " A B C group binary\n", + "0 -1.326375 1.551708 -0.677944 X 0\n", + "1 -0.485501 -1.675228 0.702936 X 0\n", + "2 0.158544 -2.037403 -0.658890 X 0\n", + "3 0.614415 0.572066 -0.415246 X 1\n", + "4 -0.229457 -0.544331 0.905853 X 0\n", + ".. ... ... ... ... ...\n", + "95 0.562179 -0.967757 1.226888 Z 1\n", + "96 1.137781 -0.703574 1.129964 Z 1\n", + "97 -0.937513 1.747337 0.907810 Z 1\n", + "98 0.703133 1.206623 1.129779 Z 0\n", + "99 1.042202 -0.056653 -0.047265 Z 1\n", + "\n", + "[300 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat(\n", + " [\n", + " pd.DataFrame(\n", + " {\n", + " \"A\": np.random.normal(0, 1, 100),\n", + " \"B\": np.random.normal(0, 1, 100),\n", + " \"C\": np.random.normal(0, 1, 100),\n", + " \"group\": [\"X\"] * 100,\n", + " \"binary\": np.random.choice([0, 1], 100, p=[0.7, 0.3]),\n", + " }\n", + " ),\n", + " pd.DataFrame(\n", + " {\n", + " \"A\": np.random.normal(0, 1, 100),\n", + " \"B\": np.random.normal(5, 2, 100),\n", + " \"C\": np.random.normal(1, 1, 100),\n", + " \"group\": [\"Y\"] * 100,\n", + " \"binary\": np.random.choice([0, 1], 100, p=[0.3, 0.7]),\n", + " }\n", + " ),\n", + " pd.DataFrame(\n", + " {\n", + " \"A\": np.random.normal(0, 1, 100),\n", + " \"B\": np.random.normal(0, 1, 100),\n", + " \"C\": np.random.normal(1, 1, 100),\n", + " \"group\": [\"Z\"] * 100,\n", + " \"binary\": np.random.choice([0, 1], 100, p=[0.5, 0.5]),\n", + " }\n", + " ),\n", + " ]\n", + ")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Difference in means\n", + "\n", + "The only table that calculates anything is the mean difference table. Give it a DataFrame, tell it which variables you're interested in, and it'll give you this." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: Standard errors assume samples are drawn independently.\n", + "Note: Standard errors assume samples are drawn independently.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "
Differences in means
Means Differences
XYZOverall MeanX - YX - ZY - Z
N=100N=100N=100N=300
A-0.12-0.1160.058-0.0580.001-0.173-0.174
(0.099)(0.102)(0.092)(0.056)(0.142)(0.135)(0.137)
B0.1085.0080.0591.725-4.900***0.0484.949***
(0.102)(0.207)(0.105)(0.158)(0.231)(0.147)(0.232)
C0.0621.1221.2230.802-1.060***-1.161***-0.101
(0.082)(0.106)(0.091)(0.062)(0.134)(0.123)(0.140)
* p< 0.1, ** p< 0.05, *** p< 0.01
" + ], + "text/plain": [ + "\n", + " Differences in means \n", + "=====================================================================================================================\n", + " Means Differences \n", + " ---------------------------------------------- ---------------------------------------------- \n", + " X Y Z Overall Mean X - Y X - Z Y - Z \n", + " N=100 N=100 N=100 N=300 \n", + "---------------------------------------------------------------------------------------------------------------------\n", + " A -0.12 -0.116 0.058 -0.058 0.001 -0.173 -0.174 \n", + " (0.099) (0.102) (0.092) (0.056) (0.142) (0.135) (0.137) \n", + " B 0.108 5.008 0.059 1.725 -4.900*** 0.048 4.949*** \n", + " (0.102) (0.207) (0.105) (0.158) (0.231) (0.147) (0.232) \n", + " C 0.062 1.122 1.223 0.802 -1.060*** -1.161*** -0.101 \n", + " (0.082) (0.106) (0.091) (0.062) (0.134) (0.123) (0.140) \n", + "---------------------------------------------------------------------------------------------------------------------\n", + "* p< 0.1, ** p< 0.05, *** p< 0.01 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table1 = tables.MeanDifferenceTable(\n", + " df=df,\n", + " var_list=[\"A\", \"B\", \"C\"],\n", + " group_var=\"group\",\n", + " diff_pairs=[(\"X\", \"Y\"), (\"X\", \"Z\"), (\"Y\", \"Z\")],\n", + ")\n", + "table1.caption = \"Differences in means\"\n", + "table1.label = \"table:differencesinmeans\"\n", + "table1.table_params[\"caption_location\"] = \"top\"\n", + "\n", + "\n", + "def bold_b(value, **kwargs):\n", + " return {\"value\": f\"{value:.3f}\", \"bold\": True}\n", + "\n", + "\n", + "table1.custom_formatters({(\"A\", \"X\"): lambda x: f\"{x:.2f}\", \"B\": bold_b})\n", + "table1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To render a table in LaTeX, just use the `render_latex` method. It comes with two optional arguments: `outfile` and `only_tabular`. If `outfile` is specified, the table will be saved to that file, otherwise a string with the text is returned. If `only_tabular=True`, then the table will only be wrapped in a `tabular` LaTeX environment, not a `table` environment.\n", + "\n", + "See `sample_tex.tex`, `tables.tex`, `tabular_table.tex`, `main.tex`, and `main.pdf` to see the output." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: Standard errors assume samples are drawn independently.\n" + ] + } + ], + "source": [ + "table1.render_latex(outfile=\"tables.tex\", only_tabular=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `SummaryTable` class does exactly what it sounds like---creates a summary table. Under the hood it's just using the `.describe()` method of a Pandas DataFrame, it's there to take advantage of the formatting and rendering options `statstables` has. The cell below shows many of the customization options you have with `statstables`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Summary Table
First Second
aBC
ExampleYesNoYes
Number of Observations300300300
Mean-0.061.7250.802
Std. Dev.0.9762.7451.0709
Min.-2.782-2.535-1.480
-0.709-0.2210.094
-0.0500.8140.736
0.5433.7031.501
Max.2.8210.803.99
NoYesNo
LowestLow ALow BLow C
The default note aligns over here.
But you can move it to the middle!
Or over here!
" + ], + "text/plain": [ + "\n", + " Summary Table \n", + "========================================================\n", + " First Second \n", + " -------- ------------------ \n", + " a B C \n", + " Example Yes No Yes \n", + "--------------------------------------------------------\n", + " Number of Observations 300 300 300 \n", + " Mean -0.06 1.725 0.802 \n", + " Std. Dev. 0.976 2.745 1.0709 \n", + " Min. -2.782 -2.535 -1.480 \n", + " ░ -0.709 -0.221 0.094 \n", + " ▒ -0.050 0.814 0.736 \n", + " ▓ 0.543 3.703 1.501 \n", + " Max. 2.82 10.80 3.99 \n", + " No Yes No \n", + "--------------------------------------------------------\n", + " Lowest Low A Low B Low C \n", + "--------------------------------------------------------\n", + " The default note aligns over here.\n", + " But you can move it to the middle! \n", + "Or over here! " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table2 = tables.SummaryTable(df=df, var_list=[\"A\", \"B\", \"C\"])\n", + "# You can provide a custom formatter for each column, row, or, by specifying a\n", + "# specific cell by maknig the key a tuple with (index name, column name).\n", + "table2.custom_formatters(\n", + " {\n", + " \"count\": lambda x: f\"{x:,.0f}\",\n", + " \"max\": lambda x: f\"{x:,.2f}\",\n", + " (\"mean\", \"A\"): lambda x: f\"{x:,.2f}\",\n", + " (\"std\", \"C\"): lambda x: f\"{x:,.4f}\",\n", + " }\n", + ")\n", + "# rename index and column labels\n", + "table2.rename_index({\"count\": \"Number of Observations\"})\n", + "table2.rename_columns({\"A\": \"a\"})\n", + "# add labels that span multiple columns\n", + "table2.add_multicolumns([\"First\", \"Second\"], [1, 2])\n", + "# add some lines all over the place.\n", + "table2.add_line([\"Yes\", \"No\", \"Yes\"], location=\"after-columns\", label=\"Example\")\n", + "# add a line with no index label\n", + "table2.add_line([\"No\", \"Yes\", \"No\"], location=\"after-body\")\n", + "table2.add_line([\"Low A\", \"Low B\", \"Low C\"], location=\"after-footer\", label=\"Lowest\")\n", + "# add notes beneath the table\n", + "table2.add_note(\"The default note aligns over here.\")\n", + "table2.add_note(\"But you can move it to the middle!\", alignment=\"c\")\n", + "table2.add_note(\"Or over here!\", alignment=\"l\")\n", + "table2.caption = \"Summary Table\"\n", + "table2.label = \"table:summarytable\"\n", + "\n", + "table2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table2_tex = table2.render_latex(only_tabular=False)\n", + "with Path(\"tables.tex\").open(\"a\") as f:\n", + " f.write(table2_tex)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Because you'll almost certainly want to make tables specific to your work, there's the `GenericTable` class that you can pass a DataFrame to and get access to all the customization tools available." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + "
Unique Sites10,000
Unique IPs20,000
IPs in EU5,000
IPs in US3,000
IPs outside EU5,000
" + ], + "text/plain": [ + "====================================\n", + " Unique Sites 10,000 \n", + " Unique IPs 20,000 \n", + " IPs in EU 5,000 \n", + " IPs in US 3,000 \n", + " IPs outside EU 5,000 \n", + "------------------------------------" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ip_sites_summary = pd.DataFrame(\n", + " {\n", + " \"a\": [\"Unique Sites\", \"Unique IPs\", \"IPs in EU\", \"IPs in US\", \"IPs outside EU\"],\n", + " \"b\": [10000, 20000, 5000, 3000, 5000],\n", + " }\n", + ")\n", + "ip_sites_table = tables.GenericTable(ip_sites_summary, include_index=False)\n", + "ip_sites_table.table_params[\"show_columns\"] = False\n", + "ip_sites_table.custom_formatters({\"b\": lambda x: f\"{x:,.0f}\"})\n", + "ip_sites_table.column_alignment = \"l\"\n", + "ip_sites_table" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ip_sites_table.render_latex(only_tabular=True, outfile=\"tabular_table.tex\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In addition to LateX and HTML, `statstables` can output ASCII tables. This is what you'll get if you print the table or use it in the repl." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================\n", + " Unique Sites 10,000 \n", + " Unique IPs 20,000 \n", + " IPs in EU 5,000 \n", + " IPs in US 3,000 \n", + " IPs outside EU 5,000 \n", + "------------------------------------\n" + ] + } + ], + "source": [ + "print(ip_sites_table)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: Standard errors assume samples are drawn independently.\n", + "\n", + " Differences in means \n", + "=====================================================================================================================\n", + " Means Differences \n", + " ---------------------------------------------- ---------------------------------------------- \n", + " X Y Z Overall Mean X - Y X - Z Y - Z \n", + " N=100 N=100 N=100 N=300 \n", + "---------------------------------------------------------------------------------------------------------------------\n", + " A -0.12 -0.116 0.058 -0.058 0.001 -0.173 -0.174 \n", + " (0.099) (0.102) (0.092) (0.056) (0.142) (0.135) (0.137) \n", + " B 0.108 5.008 0.059 1.725 -4.900*** 0.048 4.949*** \n", + " (0.102) (0.207) (0.105) (0.158) (0.231) (0.147) (0.232) \n", + " C 0.062 1.122 1.223 0.802 -1.060*** -1.161*** -0.101 \n", + " (0.082) (0.106) (0.091) (0.062) (0.134) (0.123) (0.140) \n", + "---------------------------------------------------------------------------------------------------------------------\n", + "* p< 0.1, ** p< 0.05, *** p< 0.01 \n" + ] + } + ], + "source": [ + "print(table1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also change the characters used for each of the lines." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: Standard errors assume samples are drawn independently.\n", + "\n", + " Differences in means \n", + "-----------------------------------------------------------------------------------------------------------------------\n", + "+ Means Differences +\n", + "+ ---------------------------------------------- ---------------------------------------------- +\n", + "+ X Y Z Overall Mean X - Y X - Z Y - Z +\n", + "+ N=100 N=100 N=100 N=300 +\n", + "+=====================================================================================================================+\n", + "+ A -0.12 -0.116 0.058 -0.058 0.001 -0.173 -0.174 +\n", + "+ (0.099) (0.102) (0.092) (0.056) (0.142) (0.135) (0.137) +\n", + "+ B 0.108 5.008 0.059 1.725 -4.900*** 0.048 4.949*** +\n", + "+ (0.102) (0.207) (0.105) (0.158) (0.231) (0.147) (0.232) +\n", + "+ C 0.062 1.122 1.223 0.802 -1.060*** -1.161*** -0.101 +\n", + "+ (0.082) (0.106) (0.091) (0.062) (0.134) (0.123) (0.140) +\n", + "-----------------------------------------------------------------------------------------------------------------------\n", + "* p< 0.1, ** p< 0.05, *** p< 0.01 \n" + ] + } + ], + "source": [ + "st.STParams[\"ascii_header_char\"] = \"-\"\n", + "st.STParams[\"ascii_footer_char\"] = \"-\"\n", + "st.STParams[\"ascii_mid_rule_char\"] = \"=\"\n", + "st.STParams[\"ascii_border_char\"] = \"+\"\n", + "\n", + "print(table1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or the amount of space around each cell." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------\n", + "+ Unique Sites 10,000 +\n", + "+ Unique IPs 20,000 +\n", + "+ IPs in EU 5,000 +\n", + "+ IPs in US 3,000 +\n", + "+ IPs outside EU 5,000 +\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "st.STParams[\"ascii_padding\"] = 5\n", + "\n", + "print(ip_sites_table)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And whether, when you have a column label that spans multiple columns, that label is underlined." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Summary Table
First Second
aBC
ExampleYesNoYes
Number of Observations300300300
Mean-0.061.7250.802
Std. Dev.0.9762.7451.0709
Min.-2.782-2.535-1.480
-0.709-0.2210.094
-0.0500.8140.736
0.5433.7031.501
Max.2.8210.803.99
NoYesNo
LowestLow ALow BLow C
The default note aligns over here.
But you can move it to the middle!
Or over here!
" + ], + "text/plain": [ + "\n", + " Summary Table \n", + "----------------------------------------------------------------------------------\n", + "+ First Second +\n", + "+ -------------- ------------------------------ +\n", + "+ a B C +\n", + "+ Example Yes No Yes +\n", + "+================================================================================+\n", + "+ Number of Observations 300 300 300 +\n", + "+ Mean -0.06 1.725 0.802 +\n", + "+ Std. Dev. 0.976 2.745 1.0709 +\n", + "+ Min. -2.782 -2.535 -1.480 +\n", + "+ ░ -0.709 -0.221 0.094 +\n", + "+ ▒ -0.050 0.814 0.736 +\n", + "+ ▓ 0.543 3.703 1.501 +\n", + "+ Max. 2.82 10.80 3.99 +\n", + "+ No Yes No +\n", + "----------------------------------------------------------------------------------\n", + "+ Lowest Low A Low B Low C +\n", + "----------------------------------------------------------------------------------\n", + " The default note aligns over here.\n", + " But you can move it to the middle! \n", + "Or over here! " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table2" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Summary Table \n", + "----------------------------------------------------------------------------------\n", + "+ First Second +\n", + "+ -------------- ------------------------------ +\n", + "+ a B C +\n", + "+ Example Yes No Yes +\n", + "+================================================================================+\n", + "+ Number of Observations 300 300 300 +\n", + "+ Mean -0.06 1.725 0.802 +\n", + "+ Std. Dev. 0.976 2.745 1.0709 +\n", + "+ Min. -2.782 -2.535 -1.480 +\n", + "+ ░ -0.709 -0.221 0.094 +\n", + "+ ▒ -0.050 0.814 0.736 +\n", + "+ ▓ 0.543 3.703 1.501 +\n", + "+ Max. 2.82 10.80 3.99 +\n", + "+ No Yes No +\n", + "----------------------------------------------------------------------------------\n", + "+ Lowest Low A Low B Low C +\n", + "----------------------------------------------------------------------------------\n", + " The default note aligns over here.\n", + " But you can move it to the middle! \n", + "Or over here! \n" + ] + } + ], + "source": [ + "print(table2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that these changes will apply to all tables you print.\n", + "\n", + "# Statistical Models\n", + "\n", + "Finally, `statstables` has some support for creating tables from the models in the `statsmodels` and `linearmodels` packages." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import statsmodels.formula.api as smf" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimization terminated successfully.\n", + " Current function value: 0.670993\n", + " Iterations 4\n" + ] + } + ], + "source": [ + "mod1 = smf.ols(\"A ~ B + C -1\", data=df).fit()\n", + "mod2 = smf.ols(\"A ~ B + C\", data=df).fit()\n", + "mod3 = smf.probit(\"binary ~ A + B + C\", data=df).fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "
(1)(2)(3)
Intercept-0.025-0.284***
(0.076)(0.100)
B-0.029-0.0260.070**
(0.019)(0.021)(0.028)
C0.0060.0150.123*
(0.047)(0.054)(0.070)
Observations300300300
R20.0080.005
Pseudo R20.030
F Statistic1.2550.778
*p<0.1, **p<0.05, ***p<0.01
" + ], + "text/plain": [ + "------------------------------------------------------------------------------------------\n", + "+ (1) (2) (3) +\n", + "+========================================================================================+\n", + "+ Intercept -0.025 -0.284*** +\n", + "+ (0.076) (0.100) +\n", + "+ B -0.029 -0.026 0.070** +\n", + "+ (0.019) (0.021) (0.028) +\n", + "+ C 0.006 0.015 0.123* +\n", + "+ (0.047) (0.054) (0.070) +\n", + "==========================================================================================\n", + "+ Observations 300 300 300 +\n", + "+ R² 0.008 0.005 +\n", + "+ Pseudo R² 0.030 +\n", + "+ F Statistic 1.255 0.778 +\n", + "------------------------------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mod_table = tables.ModelTable(models=[mod1, mod2, mod3])\n", + "mod_table.table_params[\"show_model_numbers\"] = True\n", + "mod_table.parameter_order([\"Intercept\", \"B\", \"C\"])\n", + "mod_table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the basic [IV example](https://bashtage.github.io/linearmodels/iv/examples/basic-examples.html) from the LinearModels library:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "
OLS 2SLS
First Stage Second Stage
(1)(2)(3)
Intercept-0.18510.237***0.441
(0.185)(0.275)(0.445)
Father Education0.269***
(0.029)
Education0.109***0.059*
(0.014)(0.035)
Observations428428428
R20.1180.1730.093
F Statistic57.196***89.258***2.849*
*p<0.1, **p<0.05, ***p<0.01
" + ], + "text/plain": [ + "----------------------------------------------------------------------------------------------\n", + "+ OLS 2SLS +\n", + "+ -------------------- ------------------------------------------ +\n", + "+ First Stage Second Stage +\n", + "+ (1) (2) (3) +\n", + "+============================================================================================+\n", + "+ Intercept -0.185 10.237*** 0.441 +\n", + "+ (0.185) (0.275) (0.445) +\n", + "+ Father Education 0.269*** +\n", + "+ (0.029) +\n", + "+ Education 0.109*** 0.059* +\n", + "+ (0.014) (0.035) +\n", + "==============================================================================================\n", + "+ Observations 428 428 428 +\n", + "+ R² 0.118 0.173 0.093 +\n", + "+ F Statistic 57.196*** 89.258*** 2.849* +\n", + "----------------------------------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from linearmodels.datasets import mroz\n", + "from linearmodels.iv import IV2SLS\n", + "from statsmodels.api import add_constant\n", + "\n", + "data = mroz.load()\n", + "data = data.dropna()\n", + "data = add_constant(data, has_constant=\"add\")\n", + "\n", + "res_ols = IV2SLS(np.log(data.wage), data[[\"const\", \"educ\"]], None, None).fit(\n", + " cov_type=\"unadjusted\"\n", + ")\n", + "res_second = IV2SLS(np.log(data.wage), data[[\"const\"]], data.educ, data.fatheduc).fit(\n", + " cov_type=\"unadjusted\"\n", + ")\n", + "\n", + "ivtable = tables.ModelTable(\n", + " models=[res_ols, res_second.first_stage.individual[\"educ\"], res_second]\n", + ")\n", + "ivtable.rename_covariates(\n", + " {\n", + " \"const\": \"Intercept\",\n", + " \"educ\": \"Education\",\n", + " \"fatheduc\": \"Father Education\",\n", + " }\n", + ")\n", + "ivtable.parameter_order([\"const\", \"fatheduc\", \"educ\"])\n", + "ivtable.add_multicolumns([\"OLS\", \"2SLS\"], [1, 2])\n", + "ivtable.add_multicolumns([\"\", \"First Stage\", \"Second Stage\"], [1] * 3, underline=False)\n", + "ivtable" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------\n", + "+ OLS 2SLS +\n", + "+ -------------- ------------------------------ +\n", + "+ First Stage Second Stage +\n", + "+ (1) (2) (3) +\n", + "+====================================================================+\n", + "+ Intercept -0.185 10.237*** 0.441 +\n", + "+ (0.185) (0.275) (0.445) +\n", + "+ Father Education 0.269*** +\n", + "+ (0.029) +\n", + "+ Education 0.109*** 0.059* +\n", + "+ (0.014) (0.035) +\n", + "======================================================================\n", + "+ Observations 428 428 428 +\n", + "+ R² 0.118 0.173 0.093 +\n", + "+ F Statistic 57.196*** 89.258*** 2.849* +\n", + "----------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 \n" + ] + } + ], + "source": [ + "st.STParams[\"ascii_padding\"] = 2\n", + "ivtable.index_alignment = \"c\"\n", + "print(ivtable)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "ivtable.render_latex(outfile=\"ivtable.tex\", only_tabular=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the `linearmodels` panel data example" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------------\n", + "+ Dependent Variable: Log(Wage) +\n", + "+ ---------------------------------------------- +\n", + "+ (1) (2) (3) +\n", + "+======================================================================+\n", + "+ Intercept 0.092 0.023 1.871*** +\n", + "+ (0.078) (0.151) (0.038) +\n", + "+ Experience 0.067*** 0.106*** +\n", + "+ (0.014) (0.015) +\n", + "+ Experience Squared -0.002*** -0.005*** -0.005*** +\n", + "+ (0.001) (0.001) (0.001) +\n", + "+ Union 0.182*** 0.106*** 0.080*** +\n", + "+ (0.017) (0.018) (0.019) +\n", + "+ Married 0.108*** 0.064*** 0.047** +\n", + "+ (0.016) (0.017) (0.018) +\n", + "+ Black -0.139*** -0.139*** +\n", + "+ (0.024) (0.048) +\n", + "========================================================================\n", + "+ Observations 4,360 4,360 4,360 +\n", + "+ N. Groups 545 545 545 +\n", + "+ R² 0.189 0.181 0.022 +\n", + "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", + "------------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 \n" + ] + } + ], + "source": [ + "from linearmodels.datasets import wage_panel\n", + "from linearmodels.panel import PooledOLS, RandomEffects, PanelOLS\n", + "\n", + "data = wage_panel.load()\n", + "year = pd.Categorical(data.year)\n", + "data = data.set_index([\"nr\", \"year\"])\n", + "data[\"year\"] = year\n", + "\n", + "data = wage_panel.load()\n", + "year = pd.Categorical(data.year)\n", + "data = data.set_index([\"nr\", \"year\"])\n", + "data[\"year\"] = year\n", + "exog_vars = [\"black\", \"hisp\", \"exper\", \"expersq\", \"married\", \"educ\", \"union\", \"year\"]\n", + "exog = add_constant(data[exog_vars])\n", + "pooled_mod = PooledOLS(data.lwage, exog).fit()\n", + "random_mod = RandomEffects(data.lwage, exog).fit()\n", + "exog_vars = [\n", + " \"expersq\",\n", + " \"union\",\n", + " \"married\",\n", + "]\n", + "panel_exog = add_constant(data[exog_vars])\n", + "panel_mod = PanelOLS(\n", + " data.lwage, panel_exog, entity_effects=True, time_effects=True\n", + ").fit()\n", + "panel_table = st.tables.ModelTable([pooled_mod, random_mod, panel_mod])\n", + "panel_table.dependent_variable_name = \"Log(Wage)\"\n", + "panel_table.rename_covariates(\n", + " {\n", + " \"const\": \"Intercept\",\n", + " \"exper\": \"Experience\",\n", + " \"expersq\": \"Experience Squared\",\n", + " \"union\": \"Union\",\n", + " \"married\": \"Married\",\n", + " \"black\": \"Black\",\n", + " }\n", + ")\n", + "panel_table.parameter_order([\"const\", \"exper\", \"expersq\", \"union\", \"married\", \"black\"])\n", + "print(panel_table)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you would like to add more models that are not currently directly supported by `statstables`, you can create a custom `ModelData` class for that model. See the examples in `statstables/modeltables.py` for examples of how to make that class. Once the class has been created, add it to the `st.SupportedModels` dictionary by doing:\n", + "\n", + "```python\n", + "import statstables as st\n", + "from yourmodelpackage import ModelOutputClass\n", + "\n", + "class CustomModelClass(st.modeltables.ModelData):\n", + " \"\"\"\n", + " Class used to pull the parameters of the model.\n", + " See statstables/modeltables.py for full implementation examples.\n", + " \"\"\"\n", + " ...\n", + "\n", + "st.SupportedModels.add_model(ModelOutputClass, CustomModelClass)\n", + "```\n", + "where `\"ModelOutputClass\"` is the type of object returned after fitting the model.\n", + "\n", + "For example, if `statsmodels` were not supported (for the record, it is) but you wanted to create tables from output from those regression models, you would do:\n", + "```python\n", + "# import the object that is returned when you fit the model\n", + "from statsmodels.regression.linear_model import RegressionResultsWrapper\n", + "from dataclass import dataclass\n", + "\n", + "@dataclass\n", + "class StatsModelsData(st.modeltables.ModelData):\n", + " \"\"\"\n", + " Create a class for processing the model data.\n", + " \"\"\"\n", + " def __post_init__(self):\n", + " ...\n", + "\n", + " def pull_params(self):\n", + " ...\n", + "\n", + "st.SupportedModels.add_model(RegressionResultsWrapper, StatsModelsData) \n", + "```\n", + "\n", + "## Formatting\n", + "\n", + "In addition to specifying the number of significant digits and thousands separators, `statstables` allows the user to format the color and font style of cells in LaTex and HTML tables.\n", + "\n", + "Start by creating a table. As of version 0.0.14, you can directly pass parameters into the initializer of each table class." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "
Dependent Variable: Log(Wage)
(1)(2)(3)
α0.0920.0231.871***
(0.078)(0.151)(0.038)
Experience0.067***0.106***
(0.014)(0.015)
Experience²-0.002***-0.005***-0.005***
(0.001)(0.001)(0.001)
Union0.182***0.106***0.080***
(0.017)(0.018)(0.019)
Married0.108***0.064***0.047**
(0.016)(0.017)(0.018)
Black-0.139***-0.139***
(0.024)(0.048)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.459***68.409***27.959***
*p<0.1, **p<0.05, ***p<0.01
" + ], + "text/plain": [ + "------------------------------------------------------------------\n", + "+ Dependent Variable: Log(Wage) +\n", + "+ ---------------------------------------------- +\n", + "+ (1) (2) (3) +\n", + "+================================================================+\n", + "+ α 0.092 0.023 1.871*** +\n", + "+ (0.078) (0.151) (0.038) +\n", + "+ Experience 0.067*** 0.106*** +\n", + "+ (0.014) (0.015) +\n", + "+ Experience² -0.002*** -0.005*** -0.005*** +\n", + "+ (0.001) (0.001) (0.001) +\n", + "+ Union 0.182*** 0.106*** 0.080*** +\n", + "+ (0.017) (0.018) (0.019) +\n", + "+ Married 0.108*** 0.064*** 0.047** +\n", + "+ (0.016) (0.017) (0.018) +\n", + "+ Black -0.139*** -0.139*** +\n", + "+ (0.024) (0.048) +\n", + "==================================================================\n", + "+ Observations 4,360 4,360 4,360 +\n", + "+ N. Groups 545 545 545 +\n", + "+ R² 0.189 0.181 0.022 +\n", + "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", + "------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "covariate_labels = {\n", + " # statstables will convert LaTeX to unicode when rendering HTML and ASCII tables\n", + " \"const\": r\"$\\alpha$\",\n", + " \"exper\": \"Experience\",\n", + " \"expersq\": \"Experience$^2$\",\n", + " \"union\": \"Union\",\n", + " \"married\": \"Married\",\n", + " \"black\": \"Black\",\n", + "}\n", + "covariate_order = [\"const\", \"exper\", \"expersq\", \"union\", \"married\", \"black\"]\n", + "panel_table = st.tables.ModelTable(\n", + " [pooled_mod, random_mod, panel_mod],\n", + " covariate_labels=covariate_labels,\n", + " covariate_order=covariate_order,\n", + " dependent_variable_name=\"Log(Wage)\",\n", + ")\n", + "panel_table" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------------\n", + "+ Dependent Variable: Log(Wage) +\n", + "+ ---------------------------------------------- +\n", + "+ (1) (2) (3) +\n", + "+================================================================+\n", + "+ α 0.092 0.023 1.871*** +\n", + "+ (0.078) (0.151) (0.038) +\n", + "+ Experience 0.067*** 0.106*** +\n", + "+ (0.014) (0.015) +\n", + "+ Experience² -0.002*** -0.005*** -0.005*** +\n", + "+ (0.001) (0.001) (0.001) +\n", + "+ Union 0.182*** 0.106*** 0.080*** +\n", + "+ (0.017) (0.018) (0.019) +\n", + "+ Married 0.108*** 0.064*** 0.047** +\n", + "+ (0.016) (0.017) (0.018) +\n", + "+ Black -0.139*** -0.139*** +\n", + "+ (0.024) (0.048) +\n", + "==================================================================\n", + "+ Observations 4,360 4,360 4,360 +\n", + "+ N. Groups 545 545 545 +\n", + "+ R² 0.189 0.181 0.022 +\n", + "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", + "------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 \n" + ] + } + ], + "source": [ + "print(panel_table)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use customized formatting, create a function that will return a dictionary indicating whether the text should be bold, italicized, or made a different color." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "
Dependent Variable: Log(Wage)
(1)(2)(3)
α0.0920.0231.871***
(0.078)(0.151)(0.038)
Experience 0.067*** 0.106***
(0.014)(0.015)
Experience²-0.002***-0.005***-0.005***
(0.001)(0.001)(0.001)
Union0.182***0.106***0.080***
(0.017)(0.018)(0.019)
Married0.108***0.064***0.047**
(0.016)(0.017)(0.018)
Black-0.139***-0.139***
(0.024)(0.048)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.459***68.409***27.959***
*p<0.1, **p<0.05, ***p<0.01
" + ], + "text/plain": [ + "------------------------------------------------------------------\n", + "+ Dependent Variable: Log(Wage) +\n", + "+ ---------------------------------------------- +\n", + "+ (1) (2) (3) +\n", + "+================================================================+\n", + "+ α 0.092 0.023 1.871*** +\n", + "+ (0.078) (0.151) (0.038) +\n", + "+ Experience 0.067*** 0.106*** +\n", + "+ (0.014) (0.015) +\n", + "+ Experience² -0.002*** -0.005*** -0.005*** +\n", + "+ (0.001) (0.001) (0.001) +\n", + "+ Union 0.182*** 0.106*** 0.080*** +\n", + "+ (0.017) (0.018) (0.019) +\n", + "+ Married 0.108*** 0.064*** 0.047** +\n", + "+ (0.016) (0.017) (0.018) +\n", + "+ Black -0.139*** -0.139*** +\n", + "+ (0.024) (0.048) +\n", + "==================================================================\n", + "+ Observations 4,360 4,360 4,360 +\n", + "+ N. Groups 545 545 545 +\n", + "+ R² 0.189 0.181 0.022 +\n", + "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", + "------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def parameter_formatter(value: int | float | str, **kwargs) -> dict | str:\n", + " if isinstance(value, str):\n", + " return value\n", + " color = \"red\"\n", + " if value > 0:\n", + " color = \"green\"\n", + " if isinstance(value, str):\n", + " val = value\n", + " else:\n", + " val = f\"{value: 0.3f}\"\n", + " return {\"bold\": True, \"value\": val, \"italic\": True, \"color\": color}\n", + "\n", + "\n", + "panel_table.custom_formatters(\n", + " {\"exper\": parameter_formatter, \"expersq\": parameter_formatter}\n", + ")\n", + "panel_table.render_latex(outfile=\"wage_table.tex\")\n", + "panel_table" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "panel_table.render_html(outfile=\"panel_table.html\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to apply the formatter to every row and column, you can change the default formatter to a function, as shown here. Instances of the `ModelTable` and `MeanDifferencesTable` classes will pass a few keyword arguments into the formatting function so you should either include them as optional arguments or include `**kwargs` in your function.\n", + "\n", + "`ModelTable` passes the following arguments:\n", + "* `p_value`: float = the p-value for the parameter estimate\n", + "* `se`: float = the standard error of the parameter estimate\n", + "* `ci`: tuple(float, float) = a tuple containing the confidence interval of the parameter estimate\n", + "\n", + "`MeanDifferencesTable` passes:\n", + "* `p_value`: float = the p-value for the difference in means\n", + "* `se`: float = the standard error of the mean" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "def new_formatter(value, p_value=None, **kwargs):\n", + " \"\"\"\n", + " A custom formatter that will make all of the statistically significant parameters\n", + " bold and read in the table.\n", + " \"\"\"\n", + " if isinstance(value, str):\n", + " return value\n", + " bold = False\n", + " color = None\n", + " if isinstance(p_value, float):\n", + " if p_value <= 0.1:\n", + " bold = True\n", + " color = \"red\"\n", + " val = f\"{value:0.3f}\"\n", + " return {\"value\": val, \"bold\": bold, \"color\": color}\n", + "\n", + "\n", + "# clear all of the labels, formatters, parameter order, and other custom features\n", + "panel_table.reset_custom_features()\n", + "panel_table.default_formatter = new_formatter" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "
(1)(2)(3)
black-0.139***-0.139***
(0.024)(0.048)
const0.0920.0231.871***
(0.078)(0.151)(0.038)
educ0.091***0.092***
(0.005)(0.011)
exper0.067***0.106***
(0.014)(0.015)
expersq-0.002***-0.005***-0.005***
(0.001)(0.001)(0.001)
hisp0.0160.022
(0.021)(0.043)
married0.108***0.064***0.047**
(0.016)(0.017)(0.018)
union0.182***0.106***0.080***
(0.017)(0.018)(0.019)
year.19810.058*0.040
(0.030)(0.025)
year.19820.063*0.031
(0.033)(0.032)
year.19830.062*0.020
(0.037)(0.042)
year.19840.090**0.043
(0.040)(0.052)
year.19850.109**0.058
(0.043)(0.061)
year.19860.142***0.092
(0.046)(0.072)
year.19870.174***0.135*
(0.049)(0.082)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.459***68.409***27.959***
*p<0.1, **p<0.05, ***p<0.01
" + ], + "text/plain": [ + "------------------------------------------------------------------\n", + "+ (1) (2) (3) +\n", + "+================================================================+\n", + "+ black -0.139*** -0.139*** +\n", + "+ (0.024) (0.048) +\n", + "+ const 0.092 0.023 1.871*** +\n", + "+ (0.078) (0.151) (0.038) +\n", + "+ educ 0.091*** 0.092*** +\n", + "+ (0.005) (0.011) +\n", + "+ exper 0.067*** 0.106*** +\n", + "+ (0.014) (0.015) +\n", + "+ expersq -0.002*** -0.005*** -0.005*** +\n", + "+ (0.001) (0.001) (0.001) +\n", + "+ hisp 0.016 0.022 +\n", + "+ (0.021) (0.043) +\n", + "+ married 0.108*** 0.064*** 0.047** +\n", + "+ (0.016) (0.017) (0.018) +\n", + "+ union 0.182*** 0.106*** 0.080*** +\n", + "+ (0.017) (0.018) (0.019) +\n", + "+ year.1981 0.058* 0.040 +\n", + "+ (0.030) (0.025) +\n", + "+ year.1982 0.063* 0.031 +\n", + "+ (0.033) (0.032) +\n", + "+ year.1983 0.062* 0.020 +\n", + "+ (0.037) (0.042) +\n", + "+ year.1984 0.090** 0.043 +\n", + "+ (0.040) (0.052) +\n", + "+ year.1985 0.109** 0.058 +\n", + "+ (0.043) (0.061) +\n", + "+ year.1986 0.142*** 0.092 +\n", + "+ (0.046) (0.072) +\n", + "+ year.1987 0.174*** 0.135* +\n", + "+ (0.049) (0.082) +\n", + "==================================================================\n", + "+ Observations 4,360 4,360 4,360 +\n", + "+ N. Groups 545 545 545 +\n", + "+ R² 0.189 0.181 0.022 +\n", + "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", + "------------------------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "panel_table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`statstables` can also make `longtables` in LaTex" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "fake = Faker()\n", + "Faker.seed(512)\n", + "np.random.seed(410)\n", + "names = [fake.name() for _ in range(100)]\n", + "x1 = np.random.randint(500, 10000, 100)\n", + "x2 = np.random.uniform(size=100)\n", + "longdata = pd.DataFrame({\"Names\": names, \"X1\": x1, \"X2\": x2})\n", + "longtable = st.tables.GenericTable(longdata, longtable=True, include_index=False)\n", + "longtable.render_latex(\"longtable.tex\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also make panel a panel table by combining two tables objects." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Panel A) Men \n", + "========================================\n", + "+ ID School +\n", + "+--------------------------------------+\n", + "+ Matthew Ortiz 1234 Texas +\n", + "+ Michael Costa 6789 UVA +\n", + "+ Samuel Johnson 1023 UMBC +\n", + "+ Dakota Snyder 5810 UGA +\n", + "+ Scott Mills 9182 Rice +\n", + "----------------------------------------\n", + "Panel B) Women \n", + "========================================================\n", + "+ ID School +\n", + "+------------------------------------------------------+\n", + "+ Erin Anderson 9183 Wake Forrest +\n", + "+ Michelle Zimmerman 5734 Emory +\n", + "+ Danielle King 1290 Texas +\n", + "+ Shannon Nelson 4743 UVA +\n", + "+ Stephanie Booth 8912 Columbia +\n", + "--------------------------------------------------------\n", + "\n" + ] + } + ], + "source": [ + "st.STParams[\"ascii_header_char\"] = \"=\"\n", + "# st.STParams[\"double_top_rule\"] = True\n", + "st.STParams[\"ascii_mid_rule_char\"] = \"-\"\n", + "fake = Faker()\n", + "Faker.seed(202)\n", + "panela_df = pd.DataFrame(\n", + " {\n", + " \"ID\": [1234, 6789, 1023, 5810, 9182],\n", + " \"School\": [\"Texas\", \"UVA\", \"UMBC\", \"UGA\", \"Rice\"],\n", + " },\n", + " index=[fake.name_male() for _ in range(5)],\n", + ")\n", + "panela = tables.GenericTable(\n", + " panela_df,\n", + " formatters={\"ID\": lambda x: f\"{x}\"},\n", + ")\n", + "panelb_df = pd.DataFrame(\n", + " {\n", + " \"ID\": [9183, 5734, 1290, 4743, 8912],\n", + " \"School\": [\"Wake Forrest\", \"Emory\", \"Texas\", \"UVA\", \"Columbia\"],\n", + " },\n", + " index=[fake.name_female() for _ in range(5)],\n", + ")\n", + "panelb = tables.GenericTable(panelb_df, formatters={\"ID\": lambda x: f\"{x}\"})\n", + "panel = tables.PanelTable([panela, panelb], [\"Men\", \"Women\"])\n", + "print(panel)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "panel.render_latex(\"panel.tex\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the name you use for the dependent variable is particularly long, the resulting LaTex table might look a bit off with the last column being noticeably wider than the others. You can fix this by using the `tabularx` environment instead of `tabular`. You may also want to use this if you'd just like a wider table." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + "
Dependent Variable: A Long Title That Would Look Odd
(1)(2)(3)
α0.0920.0231.871
(0.078)(0.151)(0.038)
Experience0.0670.106
(0.014)(0.015)
Experience²-0.002-0.005-0.005
(0.001)(0.001)(0.001)
Union0.1820.1060.080
(0.017)(0.018)(0.019)
Married0.1080.0640.047
(0.016)(0.017)(0.018)
Black-0.139-0.139
(0.024)(0.048)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.45968.40927.959
" + ], + "text/plain": [ + "==================================================================\n", + "+ Dependent Variable: A Long Title That Would Look Odd +\n", + "+ -------------------------------------------------------------- +\n", + "+ (1) (2) (3) +\n", + "+----------------------------------------------------------------+\n", + "+ α 0.092 0.023 1.871 +\n", + "+ (0.078) (0.151) (0.038) +\n", + "+ Experience 0.067 0.106 +\n", + "+ (0.014) (0.015) +\n", + "+ Experience² -0.002 -0.005 -0.005 +\n", + "+ (0.001) (0.001) (0.001) +\n", + "+ Union 0.182 0.106 0.080 +\n", + "+ (0.017) (0.018) (0.019) +\n", + "+ Married 0.108 0.064 0.047 +\n", + "+ (0.016) (0.017) (0.018) +\n", + "+ Black -0.139 -0.139 +\n", + "+ (0.024) (0.048) +\n", + "------------------------------------------------------------------\n", + "+ Observations 4,360 4,360 4,360 +\n", + "+ N. Groups 545 545 545 +\n", + "+ R² 0.189 0.181 0.022 +\n", + "+ F Statistic 72.459 68.409 27.959 +\n", + "------------------------------------------------------------------" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "panel_table_long_name = st.tables.ModelTable(\n", + " [pooled_mod, random_mod, panel_mod],\n", + " covariate_labels=covariate_labels,\n", + " covariate_order=covariate_order,\n", + " dependent_variable_name=\"A Long Title That Would Look Odd\",\n", + " dependent_var_cover_index=True,\n", + " dependent_var_alignment=\"r\",\n", + " show_stars=False,\n", + " show_significance_levels=False,\n", + " use_tabularx=True,\n", + ")\n", + "panel_table_long_name.render_latex(outfile=\"wage_table_long_name.tex\")\n", + "panel_table_long_name" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "==================================================================\n", + "+ Dependent Variable: A Long Title That Would Look Odd +\n", + "+ -------------------------------------------------------------- +\n", + "+ (1) (2) (3) +\n", + "+----------------------------------------------------------------+\n", + "+ α 0.092 0.023 1.871 +\n", + "+ (0.078) (0.151) (0.038) +\n", + "+ Experience 0.067 0.106 +\n", + "+ (0.014) (0.015) +\n", + "+ Experience² -0.002 -0.005 -0.005 +\n", + "+ (0.001) (0.001) (0.001) +\n", + "+ Union 0.182 0.106 0.080 +\n", + "+ (0.017) (0.018) (0.019) +\n", + "+ Married 0.108 0.064 0.047 +\n", + "+ (0.016) (0.017) (0.018) +\n", + "+ Black -0.139 -0.139 +\n", + "+ (0.024) (0.048) +\n", + "------------------------------------------------------------------\n", + "+ Observations 4,360 4,360 4,360 +\n", + "+ N. Groups 545 545 545 +\n", + "+ R² 0.189 0.181 0.022 +\n", + "+ F Statistic 72.459 68.409 27.959 +\n", + "------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(panel_table_long_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`statstables` also offers native support for the `pyfixest` package." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + "
Dependent Variable: Y
(1)(2)
X1-0.919***-0.007
(0.066)(0.035)
X2-0.015
(0.010)
Observations997997
R20.609
*p<0.1, **p<0.05, ***p<0.01
" + ], + "text/plain": [ + "==================================================\n", + "+ Dependent Variable: Y +\n", + "+ ------------------------------ +\n", + "+ (1) (2) +\n", + "+------------------------------------------------+\n", + "+ X1 -0.919*** -0.007 +\n", + "+ (0.066) (0.035) +\n", + "+ X2 -0.015 +\n", + "+ (0.010) +\n", + "--------------------------------------------------\n", + "+ Observations 997 997 +\n", + "+ R² 0.609 +\n", + "--------------------------------------------------\n", + "*p<0.1, **p<0.05, ***p<0.01 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pyfixest as pf\n", + "\n", + "data = pf.get_data()\n", + "feols = pf.feols(\"Y ~ X1 | f1 + f2\", data=data)\n", + "poisson_data = pf.get_data(model=\"Fepois\")\n", + "fepois = pf.fepois(\"Y ~ X1 + X2 | f1 + f2\", data=poisson_data)\n", + "pyfixest_table = tables.ModelTable([feols, fepois])\n", + "pyfixest_table.render_latex(outfile=\"pyfixest_tables.tex\", only_tabular=True)\n", + "pyfixest_table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } ], - "ref": "711d4f82-6023-4b06-90d2-b034c224ad1e", - "rows": [ - [ - "0", - "-1.326374839207587", - "1.5517080238172218", - "-0.6779442115297923", - "X", - "0" - ], - [ - "1", - "-0.4855014911590015", - "-1.6752278648482863", - "0.7029360254882143", - "X", - "0" - ], - [ - "2", - "0.15854401045608701", - "-2.0374028524818484", - "-0.6588901384541379", - "X", - "0" - ], - [ - "3", - "0.6144153695899064", - "0.5720661339871327", - "-0.41524628910099726", - "X", - "1" - ], - [ - "4", - "-0.22945697966169146", - "-0.5443314119709048", - "0.9058529965429116", - "X", - "0" - ], - [ - "5", - "1.0218027157053844", - "2.319224741274596", - "-1.4633882228990143", - "X", - "1" - ], - [ - "6", - "-1.6534774140963884", - "1.0565307111567663", - "0.09874353742518119", - "X", - "1" - ], - [ - "7", - "-1.2851359202811061", - "0.36034746199076184", - "-0.6071031092237691", - "X", - "0" - ], - [ - "8", - "1.00178310570641", - "0.18487795842319749", - "-0.9619610448171395", - "X", - "0" - ], - [ - "9", - "2.138406992327448", - "0.8158131680238252", - "-1.121991974191712", - "X", - "1" - ], - [ - "10", - "-0.041020995857143104", - "-0.345976858342248", - "-0.06781507695173056", - "X", - "0" - ], - [ - "11", - "-1.0191792128268418", - "-1.3623899746524384", - "-0.4018779636000747", - "X", - "0" - ], - [ - "12", - "-1.5780440132393516", - "-1.4758170637977432", - "-0.9204884657850502", - "X", - "0" - ], - [ - "13", - "-2.7821184227141362", - "1.1661215555211222", - "-0.22903931136584582", - "X", - "0" - ], - [ - "14", - "0.612444485087259", - "-1.9522004724356903", - "1.207819918929549", - "X", - "0" - ], - [ - "15", - "-1.12542077250989", - "-0.4865318442699179", - "-0.5079821633214079", - "X", - "0" - ], - [ - "16", - "1.3881469695585034", - "-0.6743886022897816", - "-0.49441724565810297", - "X", - "0" - ], - [ - "17", - "0.3846769833383076", - "0.5494485914990322", - "-0.874435697972917", - "X", - "0" - ], - [ - "18", - "-0.6382852947258157", - "0.960849394573923", - "-0.8266610963220111", - "X", - "1" - ], - [ - "19", - "0.09891398676439228", - "0.33274285719365226", - "-0.7282369703709443", - "X", - "0" - ], - [ - "20", - "-0.21220271424390932", - "0.30085882729636454", - "-1.410648226501225", - "X", - "0" - ], - [ - "21", - "-0.08278421444838444", - "0.8531716602166252", - "0.22445249026168654", - "X", - "0" - ], - [ - "22", - "0.5309432936590944", - "-1.0960913094021334", - "-0.1891808692956873", - "X", - "0" - ], - [ - "23", - "-1.2293589424460318", - "0.2812838498470437", - "-1.4802271854765883", - "X", - "0" - ], - [ - "24", - "-0.188486153185734", - "1.1343977926097972", - "-0.749516964627805", - "X", - "0" - ], - [ - "25", - "1.3106347359496997", - "0.8138481160482607", - "-0.7200817709168441", - "X", - "1" - ], - [ - "26", - "0.34204700942623045", - "2.9508871835399986", - "0.41977147475509236", - "X", - "0" - ], - [ - "27", - "0.8741597082079612", - "-1.0773216841925883", - "-0.995988015219408", - "X", - "1" - ], - [ - "28", - "1.9076741268422726", - "0.8726540928378016", - "0.5599070378193184", - "X", - "1" - ], - [ - "29", - "0.4280244542935084", - "1.1332058243521241", - "0.6861015391597531", - "X", - "0" - ], - [ - "30", - "-0.07074245324882782", - "0.9073428058637397", - "0.23465627018758284", - "X", - "0" - ], - [ - "31", - "-0.2005748640289552", - "-0.38594841356108306", - "0.3592344723810465", - "X", - "0" - ], - [ - "32", - "-0.7622620547215659", - "-0.17348227732753893", - "0.8731956414013242", - "X", - "0" - ], - [ - "33", - "-0.05289315952085553", - "0.5673926824088147", - "-0.9892282636172631", - "X", - "0" - ], - [ - "34", - "-0.6626650103175459", - "-0.8570943948110474", - "-0.02233670618972126", - "X", - "1" - ], - [ - "35", - "-1.1474933334641009", - "0.505438661935886", - "-0.5433418885255469", - "X", - "0" - ], - [ - "36", - "-0.5841443544432646", - "0.8618696026574305", - "-0.6850261091048194", - "X", - "0" - ], - [ - "37", - "-0.6093540570131564", - "0.26240798010125793", - "-0.19425845748858184", - "X", - "0" - ], - [ - "38", - "0.3634884000557077", - "0.345066728509237", - "0.6262002701728635", - "X", - "0" - ], - [ - "39", - "-0.9226668426854373", - "1.5831474835534", - "0.7478804194993391", - "X", - "0" - ], - [ - "40", - "0.42231480313163766", - "-0.06740675699875796", - "-0.3524079913423599", - "X", - "0" - ], - [ - "41", - "-0.004549436134780775", - "-0.09569745449085118", - "0.268451733909938", - "X", - "0" - ], - [ - "42", - "0.8915648160884971", - "-0.1473205948055779", - "0.33760125161512755", - "X", - "1" - ], - [ - "43", - "2.816976093400371", - "-1.258785583396147", - "0.33164346064316313", - "X", - "0" - ], - [ - "44", - "-0.5581139854121363", - "1.0664144307134953", - "0.8943885471344221", - "X", - "0" - ], - [ - "45", - "-0.025765332962440927", - "-1.0539201156859819", - "0.48726199769554035", - "X", - "0" - ], - [ - "46", - "0.04648735293223192", - "1.0964111222283048", - "2.7220365467358634", - "X", - "0" - ], - [ - "47", - "0.5252086141014584", - "1.307864542164725", - "0.8367160166732635", - "X", - "1" - ], - [ - "48", - "0.20646376321873006", - "-0.3997440672940264", - "1.2618602613123642", - "X", - "0" - ], - [ - "49", - "-1.6161032559576074", - "1.8243597442427602", - "1.6204498314617477", - "X", - "0" - ] - ], - "shape": { - "columns": 5, - "rows": 300 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCgroupbinary
0-1.3263751.551708-0.677944X0
1-0.485501-1.6752280.702936X0
20.158544-2.037403-0.658890X0
30.6144150.572066-0.415246X1
4-0.229457-0.5443310.905853X0
..................
950.562179-0.9677571.226888Z1
961.137781-0.7035741.129964Z1
97-0.9375131.7473370.907810Z1
980.7031331.2066231.129779Z0
991.042202-0.056653-0.047265Z1
\n", - "

300 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " A B C group binary\n", - "0 -1.326375 1.551708 -0.677944 X 0\n", - "1 -0.485501 -1.675228 0.702936 X 0\n", - "2 0.158544 -2.037403 -0.658890 X 0\n", - "3 0.614415 0.572066 -0.415246 X 1\n", - "4 -0.229457 -0.544331 0.905853 X 0\n", - ".. ... ... ... ... ...\n", - "95 0.562179 -0.967757 1.226888 Z 1\n", - "96 1.137781 -0.703574 1.129964 Z 1\n", - "97 -0.937513 1.747337 0.907810 Z 1\n", - "98 0.703133 1.206623 1.129779 Z 0\n", - "99 1.042202 -0.056653 -0.047265 Z 1\n", - "\n", - "[300 rows x 5 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.concat(\n", - " [\n", - " pd.DataFrame(\n", - " {\n", - " \"A\": np.random.normal(0, 1, 100),\n", - " \"B\": np.random.normal(0, 1, 100),\n", - " \"C\": np.random.normal(0, 1, 100),\n", - " \"group\": [\"X\"] * 100,\n", - " \"binary\": np.random.choice([0, 1], 100, p=[0.7, 0.3]),\n", - " }\n", - " ),\n", - " pd.DataFrame(\n", - " {\n", - " \"A\": np.random.normal(0, 1, 100),\n", - " \"B\": np.random.normal(5, 2, 100),\n", - " \"C\": np.random.normal(1, 1, 100),\n", - " \"group\": [\"Y\"] * 100,\n", - " \"binary\": np.random.choice([0, 1], 100, p=[0.3, 0.7]),\n", - " }\n", - " ),\n", - " pd.DataFrame(\n", - " {\n", - " \"A\": np.random.normal(0, 1, 100),\n", - " \"B\": np.random.normal(0, 1, 100),\n", - " \"C\": np.random.normal(1, 1, 100),\n", - " \"group\": [\"Z\"] * 100,\n", - " \"binary\": np.random.choice([0, 1], 100, p=[0.5, 0.5]),\n", - " }\n", - " ),\n", - " ]\n", - ")\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Difference in means\n", - "\n", - "The only table that calculates anything is the mean difference table. Give it a DataFrame, tell it which variables you're interested in, and it'll give you this." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: Standard errors assume samples are drawn independently.\n", - "Note: Standard errors assume samples are drawn independently.\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "
Differences in means
Means Differences
XYZOverall MeanX - YX - ZY - Z
N=100N=100N=100N=300
A-0.12-0.1160.058-0.0580.001-0.173-0.174
(0.099)(0.102)(0.092)(0.056)(0.142)(0.135)(0.137)
B0.1085.0080.0591.725-4.900***0.0484.949***
(0.102)(0.207)(0.105)(0.158)(0.231)(0.147)(0.232)
C0.0621.1221.2230.802-1.060***-1.161***-0.101
(0.082)(0.106)(0.091)(0.062)(0.134)(0.123)(0.140)
* p< 0.1, ** p< 0.05, *** p< 0.01
" - ], - "text/plain": [ - "\n", - " Differences in means \n", - "=====================================================================================================================\n", - " Means Differences \n", - " ---------------------------------------------- ---------------------------------------------- \n", - " X Y Z Overall Mean X - Y X - Z Y - Z \n", - " N=100 N=100 N=100 N=300 \n", - "---------------------------------------------------------------------------------------------------------------------\n", - " A -0.12 -0.116 0.058 -0.058 0.001 -0.173 -0.174 \n", - " (0.099) (0.102) (0.092) (0.056) (0.142) (0.135) (0.137) \n", - " B 0.108 5.008 0.059 1.725 -4.900*** 0.048 4.949*** \n", - " (0.102) (0.207) (0.105) (0.158) (0.231) (0.147) (0.232) \n", - " C 0.062 1.122 1.223 0.802 -1.060*** -1.161*** -0.101 \n", - " (0.082) (0.106) (0.091) (0.062) (0.134) (0.123) (0.140) \n", - "---------------------------------------------------------------------------------------------------------------------\n", - "* p< 0.1, ** p< 0.05, *** p< 0.01 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "table1 = tables.MeanDifferenceTable(\n", - " df=df,\n", - " var_list=[\"A\", \"B\", \"C\"],\n", - " group_var=\"group\",\n", - " diff_pairs=[(\"X\", \"Y\"), (\"X\", \"Z\"), (\"Y\", \"Z\")],\n", - ")\n", - "table1.caption = \"Differences in means\"\n", - "table1.label = \"table:differencesinmeans\"\n", - "table1.table_params[\"caption_location\"] = \"top\"\n", - "\n", - "\n", - "def bold_b(value, **kwargs):\n", - " return {\"value\": f\"{value:.3f}\", \"bold\": True}\n", - "\n", - "\n", - "table1.custom_formatters({(\"A\", \"X\"): lambda x: f\"{x:.2f}\", \"B\": bold_b})\n", - "table1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To render a table in LaTeX, just use the `render_latex` method. It comes with two optional arguments: `outfile` and `only_tabular`. If `outfile` is specified, the table will be saved to that file, otherwise a string with the text is returned. If `only_tabular=True`, then the table will only be wrapped in a `tabular` LaTeX environment, not a `table` environment.\n", - "\n", - "See `sample_tex.tex`, `tables.tex`, `tabular_table.tex`, `main.tex`, and `main.pdf` to see the output." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: Standard errors assume samples are drawn independently.\n" - ] - } - ], - "source": [ - "table1.render_latex(outfile=\"tables.tex\", only_tabular=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `SummaryTable` class does exactly what it sounds like---creates a summary table. Under the hood it's just using the `.describe()` method of a Pandas DataFrame, it's there to take advantage of the formatting and rendering options `statstables` has. The cell below shows many of the customization options you have with `statstables`." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Summary Table
First Second
aBC
ExampleYesNoYes
Number of Observations300300300
Mean-0.061.7250.802
Std. Dev.0.9762.7451.0709
Min.-2.782-2.535-1.480
-0.709-0.2210.094
-0.0500.8140.736
0.5433.7031.501
Max.2.8210.803.99
NoYesNo
LowestLow ALow BLow C
The default note aligns over here.
But you can move it to the middle!
Or over here!
" - ], - "text/plain": [ - "\n", - " Summary Table \n", - "========================================================\n", - " First Second \n", - " -------- ------------------ \n", - " a B C \n", - " Example Yes No Yes \n", - "--------------------------------------------------------\n", - " Number of Observations 300 300 300 \n", - " Mean -0.06 1.725 0.802 \n", - " Std. Dev. 0.976 2.745 1.0709 \n", - " Min. -2.782 -2.535 -1.480 \n", - " ░ -0.709 -0.221 0.094 \n", - " ▒ -0.050 0.814 0.736 \n", - " ▓ 0.543 3.703 1.501 \n", - " Max. 2.82 10.80 3.99 \n", - " No Yes No \n", - "--------------------------------------------------------\n", - " Lowest Low A Low B Low C \n", - "--------------------------------------------------------\n", - " The default note aligns over here.\n", - " But you can move it to the middle! \n", - "Or over here! " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "table2 = tables.SummaryTable(df=df, var_list=[\"A\", \"B\", \"C\"])\n", - "# You can provide a custom formatter for each column, row, or, by specifying a\n", - "# specific cell by maknig the key a tuple with (index name, column name).\n", - "table2.custom_formatters(\n", - " {\n", - " \"count\": lambda x: f\"{x:,.0f}\",\n", - " \"max\": lambda x: f\"{x:,.2f}\",\n", - " (\"mean\", \"A\"): lambda x: f\"{x:,.2f}\",\n", - " (\"std\", \"C\"): lambda x: f\"{x:,.4f}\",\n", - " }\n", - ")\n", - "# rename index and column labels\n", - "table2.rename_index({\"count\": \"Number of Observations\"})\n", - "table2.rename_columns({\"A\": \"a\"})\n", - "# add labels that span multiple columns\n", - "table2.add_multicolumns([\"First\", \"Second\"], [1, 2])\n", - "# add some lines all over the place.\n", - "table2.add_line([\"Yes\", \"No\", \"Yes\"], location=\"after-columns\", label=\"Example\")\n", - "# add a line with no index label\n", - "table2.add_line([\"No\", \"Yes\", \"No\"], location=\"after-body\")\n", - "table2.add_line([\"Low A\", \"Low B\", \"Low C\"], location=\"after-footer\", label=\"Lowest\")\n", - "# add notes beneath the table\n", - "table2.add_note(\"The default note aligns over here.\")\n", - "table2.add_note(\"But you can move it to the middle!\", alignment=\"c\")\n", - "table2.add_note(\"Or over here!\", alignment=\"l\")\n", - "table2.caption = \"Summary Table\"\n", - "table2.label = \"table:summarytable\"\n", - "\n", - "table2" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "table2_tex = table2.render_latex(only_tabular=False)\n", - "with Path(\"tables.tex\").open(\"a\") as f:\n", - " f.write(table2_tex)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Because you'll almost certainly want to make tables specific to your work, there's the `GenericTable` class that you can pass a DataFrame to and get access to all the customization tools available." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - "
Unique Sites10,000
Unique IPs20,000
IPs in EU5,000
IPs in US3,000
IPs outside EU5,000
" - ], - "text/plain": [ - "====================================\n", - " Unique Sites 10,000 \n", - " Unique IPs 20,000 \n", - " IPs in EU 5,000 \n", - " IPs in US 3,000 \n", - " IPs outside EU 5,000 \n", - "------------------------------------" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ip_sites_summary = pd.DataFrame(\n", - " {\n", - " \"a\": [\"Unique Sites\", \"Unique IPs\", \"IPs in EU\", \"IPs in US\", \"IPs outside EU\"],\n", - " \"b\": [10000, 20000, 5000, 3000, 5000],\n", - " }\n", - ")\n", - "ip_sites_table = tables.GenericTable(ip_sites_summary, include_index=False)\n", - "ip_sites_table.table_params[\"show_columns\"] = False\n", - "ip_sites_table.custom_formatters({\"b\": lambda x: f\"{x:,.0f}\"})\n", - "ip_sites_table.column_alignment = \"l\"\n", - "ip_sites_table" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ip_sites_table.render_latex(only_tabular=True, outfile=\"tabular_table.tex\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In addition to LateX and HTML, `statstables` can output ASCII tables. This is what you'll get if you print the table or use it in the repl." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "====================================\n", - " Unique Sites 10,000 \n", - " Unique IPs 20,000 \n", - " IPs in EU 5,000 \n", - " IPs in US 3,000 \n", - " IPs outside EU 5,000 \n", - "------------------------------------\n" - ] - } - ], - "source": [ - "print(ip_sites_table)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: Standard errors assume samples are drawn independently.\n", - "\n", - " Differences in means \n", - "=====================================================================================================================\n", - " Means Differences \n", - " ---------------------------------------------- ---------------------------------------------- \n", - " X Y Z Overall Mean X - Y X - Z Y - Z \n", - " N=100 N=100 N=100 N=300 \n", - "---------------------------------------------------------------------------------------------------------------------\n", - " A -0.12 -0.116 0.058 -0.058 0.001 -0.173 -0.174 \n", - " (0.099) (0.102) (0.092) (0.056) (0.142) (0.135) (0.137) \n", - " B 0.108 5.008 0.059 1.725 -4.900*** 0.048 4.949*** \n", - " (0.102) (0.207) (0.105) (0.158) (0.231) (0.147) (0.232) \n", - " C 0.062 1.122 1.223 0.802 -1.060*** -1.161*** -0.101 \n", - " (0.082) (0.106) (0.091) (0.062) (0.134) (0.123) (0.140) \n", - "---------------------------------------------------------------------------------------------------------------------\n", - "* p< 0.1, ** p< 0.05, *** p< 0.01 \n" - ] - } - ], - "source": [ - "print(table1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also change the characters used for each of the lines." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: Standard errors assume samples are drawn independently.\n", - "\n", - " Differences in means \n", - "-----------------------------------------------------------------------------------------------------------------------\n", - "+ Means Differences +\n", - "+ ---------------------------------------------- ---------------------------------------------- +\n", - "+ X Y Z Overall Mean X - Y X - Z Y - Z +\n", - "+ N=100 N=100 N=100 N=300 +\n", - "+=====================================================================================================================+\n", - "+ A -0.12 -0.116 0.058 -0.058 0.001 -0.173 -0.174 +\n", - "+ (0.099) (0.102) (0.092) (0.056) (0.142) (0.135) (0.137) +\n", - "+ B 0.108 5.008 0.059 1.725 -4.900*** 0.048 4.949*** +\n", - "+ (0.102) (0.207) (0.105) (0.158) (0.231) (0.147) (0.232) +\n", - "+ C 0.062 1.122 1.223 0.802 -1.060*** -1.161*** -0.101 +\n", - "+ (0.082) (0.106) (0.091) (0.062) (0.134) (0.123) (0.140) +\n", - "-----------------------------------------------------------------------------------------------------------------------\n", - "* p< 0.1, ** p< 0.05, *** p< 0.01 \n" - ] - } - ], - "source": [ - "st.STParams[\"ascii_header_char\"] = \"-\"\n", - "st.STParams[\"ascii_footer_char\"] = \"-\"\n", - "st.STParams[\"ascii_mid_rule_char\"] = \"=\"\n", - "st.STParams[\"ascii_border_char\"] = \"+\"\n", - "\n", - "print(table1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or the amount of space around each cell." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------\n", - "+ Unique Sites 10,000 +\n", - "+ Unique IPs 20,000 +\n", - "+ IPs in EU 5,000 +\n", - "+ IPs in US 3,000 +\n", - "+ IPs outside EU 5,000 +\n", - "--------------------------------------------------\n" - ] - } - ], - "source": [ - "st.STParams[\"ascii_padding\"] = 5\n", - "\n", - "print(ip_sites_table)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And whether, when you have a column label that spans multiple columns, that label is underlined." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Summary Table
First Second
aBC
ExampleYesNoYes
Number of Observations300300300
Mean-0.061.7250.802
Std. Dev.0.9762.7451.0709
Min.-2.782-2.535-1.480
-0.709-0.2210.094
-0.0500.8140.736
0.5433.7031.501
Max.2.8210.803.99
NoYesNo
LowestLow ALow BLow C
The default note aligns over here.
But you can move it to the middle!
Or over here!
" - ], - "text/plain": [ - "\n", - " Summary Table \n", - "----------------------------------------------------------------------------------\n", - "+ First Second +\n", - "+ -------------- ------------------------------ +\n", - "+ a B C +\n", - "+ Example Yes No Yes +\n", - "+================================================================================+\n", - "+ Number of Observations 300 300 300 +\n", - "+ Mean -0.06 1.725 0.802 +\n", - "+ Std. Dev. 0.976 2.745 1.0709 +\n", - "+ Min. -2.782 -2.535 -1.480 +\n", - "+ ░ -0.709 -0.221 0.094 +\n", - "+ ▒ -0.050 0.814 0.736 +\n", - "+ ▓ 0.543 3.703 1.501 +\n", - "+ Max. 2.82 10.80 3.99 +\n", - "+ No Yes No +\n", - "----------------------------------------------------------------------------------\n", - "+ Lowest Low A Low B Low C +\n", - "----------------------------------------------------------------------------------\n", - " The default note aligns over here.\n", - " But you can move it to the middle! \n", - "Or over here! " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "table2" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " Summary Table \n", - "----------------------------------------------------------------------------------\n", - "+ First Second +\n", - "+ -------------- ------------------------------ +\n", - "+ a B C +\n", - "+ Example Yes No Yes +\n", - "+================================================================================+\n", - "+ Number of Observations 300 300 300 +\n", - "+ Mean -0.06 1.725 0.802 +\n", - "+ Std. Dev. 0.976 2.745 1.0709 +\n", - "+ Min. -2.782 -2.535 -1.480 +\n", - "+ ░ -0.709 -0.221 0.094 +\n", - "+ ▒ -0.050 0.814 0.736 +\n", - "+ ▓ 0.543 3.703 1.501 +\n", - "+ Max. 2.82 10.80 3.99 +\n", - "+ No Yes No +\n", - "----------------------------------------------------------------------------------\n", - "+ Lowest Low A Low B Low C +\n", - "----------------------------------------------------------------------------------\n", - " The default note aligns over here.\n", - " But you can move it to the middle! \n", - "Or over here! \n" - ] - } - ], - "source": [ - "print(table2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that these changes will apply to all tables you print.\n", - "\n", - "# Statistical Models\n", - "\n", - "Finally, `statstables` has some support for creating tables from the models in the `statsmodels` and `linearmodels` packages." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import statsmodels.formula.api as smf" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Optimization terminated successfully.\n", - " Current function value: 0.670993\n", - " Iterations 4\n" - ] - } - ], - "source": [ - "mod1 = smf.ols(\"A ~ B + C -1\", data=df).fit()\n", - "mod2 = smf.ols(\"A ~ B + C\", data=df).fit()\n", - "mod3 = smf.probit(\"binary ~ A + B + C\", data=df).fit()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "
(1)(2)(3)
Intercept-0.025-0.284***
(0.076)(0.100)
B-0.029-0.0260.070**
(0.019)(0.021)(0.028)
C0.0060.0150.123*
(0.047)(0.054)(0.070)
Observations300300300
R20.0080.005
Pseudo R20.030
F Statistic1.2550.778
ModelOLSOLSProbit
*p<0.1, **p<0.05, ***p<0.01
" - ], - "text/plain": [ - "------------------------------------------------------------------------------------------\n", - "+ (1) (2) (3) +\n", - "+========================================================================================+\n", - "+ Intercept -0.025 -0.284*** +\n", - "+ (0.076) (0.100) +\n", - "+ B -0.029 -0.026 0.070** +\n", - "+ (0.019) (0.021) (0.028) +\n", - "+ C 0.006 0.015 0.123* +\n", - "+ (0.047) (0.054) (0.070) +\n", - "==========================================================================================\n", - "+ Observations 300 300 300 +\n", - "+ R² 0.008 0.005 +\n", - "+ Pseudo R² 0.030 +\n", - "+ F Statistic 1.255 0.778 +\n", - "+ Model OLS OLS Probit +\n", - "------------------------------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 " - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mod_table = tables.ModelTable(models=[mod1, mod2, mod3])\n", - "mod_table.table_params[\"show_model_numbers\"] = True\n", - "mod_table.parameter_order([\"Intercept\", \"B\", \"C\"])\n", - "mod_table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using the basic [IV example](https://bashtage.github.io/linearmodels/iv/examples/basic-examples.html) from the LinearModels library:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "
OLS 2SLS
First Stage Second Stage
(1)(2)(3)
Intercept-0.18510.237***0.441
(0.185)(0.275)(0.445)
Father Education0.269***
(0.029)
Education0.109***0.059*
(0.014)(0.035)
Observations428428428
R20.1180.1730.093
F Statistic57.196***89.258***2.849*
ModelOLSOLSIV-2SLS
*p<0.1, **p<0.05, ***p<0.01
" - ], - "text/plain": [ - "----------------------------------------------------------------------------------------------\n", - "+ OLS 2SLS +\n", - "+ -------------------- ------------------------------------------ +\n", - "+ First Stage Second Stage +\n", - "+ (1) (2) (3) +\n", - "+============================================================================================+\n", - "+ Intercept -0.185 10.237*** 0.441 +\n", - "+ (0.185) (0.275) (0.445) +\n", - "+ Father Education 0.269*** +\n", - "+ (0.029) +\n", - "+ Education 0.109*** 0.059* +\n", - "+ (0.014) (0.035) +\n", - "==============================================================================================\n", - "+ Observations 428 428 428 +\n", - "+ R² 0.118 0.173 0.093 +\n", - "+ F Statistic 57.196*** 89.258*** 2.849* +\n", - "+ Model OLS OLS IV-2SLS +\n", - "----------------------------------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from linearmodels.datasets import mroz\n", - "from linearmodels.iv import IV2SLS\n", - "from statsmodels.api import add_constant\n", - "\n", - "data = mroz.load()\n", - "data = data.dropna()\n", - "data = add_constant(data, has_constant=\"add\")\n", - "\n", - "res_ols = IV2SLS(np.log(data.wage), data[[\"const\", \"educ\"]], None, None).fit(\n", - " cov_type=\"unadjusted\"\n", - ")\n", - "res_second = IV2SLS(np.log(data.wage), data[[\"const\"]], data.educ, data.fatheduc).fit(\n", - " cov_type=\"unadjusted\"\n", - ")\n", - "\n", - "ivtable = tables.ModelTable(\n", - " models=[res_ols, res_second.first_stage.individual[\"educ\"], res_second]\n", - ")\n", - "ivtable.rename_covariates(\n", - " {\n", - " \"const\": \"Intercept\",\n", - " \"educ\": \"Education\",\n", - " \"fatheduc\": \"Father Education\",\n", - " }\n", - ")\n", - "ivtable.parameter_order([\"const\", \"fatheduc\", \"educ\"])\n", - "ivtable.add_multicolumns([\"OLS\", \"2SLS\"], [1, 2])\n", - "ivtable.add_multicolumns([\"\", \"First Stage\", \"Second Stage\"], [1] * 3, underline=False)\n", - "ivtable" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------------\n", - "+ OLS 2SLS +\n", - "+ -------------- ------------------------------ +\n", - "+ First Stage Second Stage +\n", - "+ (1) (2) (3) +\n", - "+====================================================================+\n", - "+ Intercept -0.185 10.237*** 0.441 +\n", - "+ (0.185) (0.275) (0.445) +\n", - "+ Father Education 0.269*** +\n", - "+ (0.029) +\n", - "+ Education 0.109*** 0.059* +\n", - "+ (0.014) (0.035) +\n", - "======================================================================\n", - "+ Observations 428 428 428 +\n", - "+ R² 0.118 0.173 0.093 +\n", - "+ F Statistic 57.196*** 89.258*** 2.849* +\n", - "+ Model OLS OLS IV-2SLS +\n", - "----------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 \n" - ] - } - ], - "source": [ - "st.STParams[\"ascii_padding\"] = 2\n", - "ivtable.index_alignment = \"c\"\n", - "print(ivtable)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "ivtable.render_latex(outfile=\"ivtable.tex\", only_tabular=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And the `linearmodels` panel data example" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------------\n", - "+ Dependent Variable: Log(Wage) +\n", - "+ ---------------------------------------------- +\n", - "+ (1) (2) (3) +\n", - "+======================================================================+\n", - "+ Intercept 0.092 0.023 1.871*** +\n", - "+ (0.078) (0.151) (0.038) +\n", - "+ Experience 0.067*** 0.106*** +\n", - "+ (0.014) (0.015) +\n", - "+ Experience Squared -0.002*** -0.005*** -0.005*** +\n", - "+ (0.001) (0.001) (0.001) +\n", - "+ Union 0.182*** 0.106*** 0.080*** +\n", - "+ (0.017) (0.018) (0.019) +\n", - "+ Married 0.108*** 0.064*** 0.047** +\n", - "+ (0.016) (0.017) (0.018) +\n", - "+ Black -0.139*** -0.139*** +\n", - "+ (0.024) (0.048) +\n", - "========================================================================\n", - "+ Observations 4,360 4,360 4,360 +\n", - "+ N. Groups 545 545 545 +\n", - "+ R² 0.189 0.181 0.022 +\n", - "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", - "------------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 \n" - ] - } - ], - "source": [ - "from linearmodels.datasets import wage_panel\n", - "from linearmodels.panel import PooledOLS, RandomEffects, PanelOLS\n", - "\n", - "data = wage_panel.load()\n", - "year = pd.Categorical(data.year)\n", - "data = data.set_index([\"nr\", \"year\"])\n", - "data[\"year\"] = year\n", - "\n", - "data = wage_panel.load()\n", - "year = pd.Categorical(data.year)\n", - "data = data.set_index([\"nr\", \"year\"])\n", - "data[\"year\"] = year\n", - "exog_vars = [\"black\", \"hisp\", \"exper\", \"expersq\", \"married\", \"educ\", \"union\", \"year\"]\n", - "exog = add_constant(data[exog_vars])\n", - "pooled_mod = PooledOLS(data.lwage, exog).fit()\n", - "random_mod = RandomEffects(data.lwage, exog).fit()\n", - "exog_vars = [\n", - " \"expersq\",\n", - " \"union\",\n", - " \"married\",\n", - "]\n", - "panel_exog = add_constant(data[exog_vars])\n", - "panel_mod = PanelOLS(\n", - " data.lwage, panel_exog, entity_effects=True, time_effects=True\n", - ").fit()\n", - "panel_table = st.tables.ModelTable([pooled_mod, random_mod, panel_mod])\n", - "panel_table.dependent_variable_name = \"Log(Wage)\"\n", - "panel_table.rename_covariates(\n", - " {\n", - " \"const\": \"Intercept\",\n", - " \"exper\": \"Experience\",\n", - " \"expersq\": \"Experience Squared\",\n", - " \"union\": \"Union\",\n", - " \"married\": \"Married\",\n", - " \"black\": \"Black\",\n", - " }\n", - ")\n", - "panel_table.parameter_order([\"const\", \"exper\", \"expersq\", \"union\", \"married\", \"black\"])\n", - "print(panel_table)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you would like to add more models that are not currently directly supported by `statstables`, you can create a custom `ModelData` class for that model. See the examples in `statstables/modeltables.py` for examples of how to make that class. Once the class has been created, add it to the `st.SupportedModels` dictionary by doing:\n", - "\n", - "```python\n", - "import statstables as st\n", - "from yourmodelpackage import ModelOutputClass\n", - "\n", - "class CustomModelClass(st.modeltables.ModelData):\n", - " ...\n", - "\n", - "st.SupportedModels[ModelOutputClass] = CustomModelClass\n", - "```\n", - "where `ModelOutputClass` is the type of object returned after fitting the model.\n", - "\n", - "## Formatting\n", - "\n", - "In addition to specifying the number of significant digits and thousands separators, `statstables` allows the user to format the color and font style of cells in LaTex and HTML tables.\n", - "\n", - "Start by creating a table. As of version 0.0.14, you can directly pass parameters into the initializer of each table class." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "
Dependent Variable: Log(Wage)
(1)(2)(3)
α0.0920.0231.871***
(0.078)(0.151)(0.038)
Experience0.067***0.106***
(0.014)(0.015)
Experience²-0.002***-0.005***-0.005***
(0.001)(0.001)(0.001)
Union0.182***0.106***0.080***
(0.017)(0.018)(0.019)
Married0.108***0.064***0.047**
(0.016)(0.017)(0.018)
Black-0.139***-0.139***
(0.024)(0.048)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.459***68.409***27.959***
*p<0.1, **p<0.05, ***p<0.01
" - ], - "text/plain": [ - "------------------------------------------------------------------\n", - "+ Dependent Variable: Log(Wage) +\n", - "+ ---------------------------------------------- +\n", - "+ (1) (2) (3) +\n", - "+================================================================+\n", - "+ α 0.092 0.023 1.871*** +\n", - "+ (0.078) (0.151) (0.038) +\n", - "+ Experience 0.067*** 0.106*** +\n", - "+ (0.014) (0.015) +\n", - "+ Experience² -0.002*** -0.005*** -0.005*** +\n", - "+ (0.001) (0.001) (0.001) +\n", - "+ Union 0.182*** 0.106*** 0.080*** +\n", - "+ (0.017) (0.018) (0.019) +\n", - "+ Married 0.108*** 0.064*** 0.047** +\n", - "+ (0.016) (0.017) (0.018) +\n", - "+ Black -0.139*** -0.139*** +\n", - "+ (0.024) (0.048) +\n", - "==================================================================\n", - "+ Observations 4,360 4,360 4,360 +\n", - "+ N. Groups 545 545 545 +\n", - "+ R² 0.189 0.181 0.022 +\n", - "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", - "------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 " - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "covariate_labels = {\n", - " # statstables will convert LaTeX to unicode when rendering HTML and ASCII tables\n", - " \"const\": r\"$\\alpha$\",\n", - " \"exper\": \"Experience\",\n", - " \"expersq\": \"Experience$^2$\",\n", - " \"union\": \"Union\",\n", - " \"married\": \"Married\",\n", - " \"black\": \"Black\",\n", - "}\n", - "covariate_order = [\"const\", \"exper\", \"expersq\", \"union\", \"married\", \"black\"]\n", - "panel_table = st.tables.ModelTable(\n", - " [pooled_mod, random_mod, panel_mod],\n", - " covariate_labels=covariate_labels,\n", - " covariate_order=covariate_order,\n", - " dependent_variable_name=\"Log(Wage)\",\n", - ")\n", - "panel_table" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------------------------------\n", - "+ Dependent Variable: Log(Wage) +\n", - "+ ---------------------------------------------- +\n", - "+ (1) (2) (3) +\n", - "+================================================================+\n", - "+ α 0.092 0.023 1.871*** +\n", - "+ (0.078) (0.151) (0.038) +\n", - "+ Experience 0.067*** 0.106*** +\n", - "+ (0.014) (0.015) +\n", - "+ Experience² -0.002*** -0.005*** -0.005*** +\n", - "+ (0.001) (0.001) (0.001) +\n", - "+ Union 0.182*** 0.106*** 0.080*** +\n", - "+ (0.017) (0.018) (0.019) +\n", - "+ Married 0.108*** 0.064*** 0.047** +\n", - "+ (0.016) (0.017) (0.018) +\n", - "+ Black -0.139*** -0.139*** +\n", - "+ (0.024) (0.048) +\n", - "==================================================================\n", - "+ Observations 4,360 4,360 4,360 +\n", - "+ N. Groups 545 545 545 +\n", - "+ R² 0.189 0.181 0.022 +\n", - "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", - "------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 \n" - ] - } - ], - "source": [ - "print(panel_table)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To use customized formatting, create a function that will return a dictionary indicating whether the text should be bold, italicized, or made a different color." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "
Dependent Variable: Log(Wage)
(1)(2)(3)
α0.0920.0231.871***
(0.078)(0.151)(0.038)
Experience 0.067*** 0.106***
(0.014)(0.015)
Experience²-0.002***-0.005***-0.005***
(0.001)(0.001)(0.001)
Union0.182***0.106***0.080***
(0.017)(0.018)(0.019)
Married0.108***0.064***0.047**
(0.016)(0.017)(0.018)
Black-0.139***-0.139***
(0.024)(0.048)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.459***68.409***27.959***
*p<0.1, **p<0.05, ***p<0.01
" - ], - "text/plain": [ - "------------------------------------------------------------------\n", - "+ Dependent Variable: Log(Wage) +\n", - "+ ---------------------------------------------- +\n", - "+ (1) (2) (3) +\n", - "+================================================================+\n", - "+ α 0.092 0.023 1.871*** +\n", - "+ (0.078) (0.151) (0.038) +\n", - "+ Experience 0.067*** 0.106*** +\n", - "+ (0.014) (0.015) +\n", - "+ Experience² -0.002*** -0.005*** -0.005*** +\n", - "+ (0.001) (0.001) (0.001) +\n", - "+ Union 0.182*** 0.106*** 0.080*** +\n", - "+ (0.017) (0.018) (0.019) +\n", - "+ Married 0.108*** 0.064*** 0.047** +\n", - "+ (0.016) (0.017) (0.018) +\n", - "+ Black -0.139*** -0.139*** +\n", - "+ (0.024) (0.048) +\n", - "==================================================================\n", - "+ Observations 4,360 4,360 4,360 +\n", - "+ N. Groups 545 545 545 +\n", - "+ R² 0.189 0.181 0.022 +\n", - "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", - "------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 " - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def parameter_formatter(value: int | float | str, **kwargs) -> dict | str:\n", - " if isinstance(value, str):\n", - " return value\n", - " color = \"red\"\n", - " if value > 0:\n", - " color = \"green\"\n", - " if isinstance(value, str):\n", - " val = value\n", - " else:\n", - " val = f\"{value: 0.3f}\"\n", - " return {\"bold\": True, \"value\": val, \"italic\": True, \"color\": color}\n", - "\n", - "\n", - "panel_table.custom_formatters(\n", - " {\"exper\": parameter_formatter, \"expersq\": parameter_formatter}\n", - ")\n", - "panel_table.render_latex(outfile=\"wage_table.tex\")\n", - "panel_table" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "panel_table.render_html(outfile=\"panel_table.html\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you want to apply the formatter to every row and column, you can change the default formatter to a function, as shown here. Instances of the `ModelTable` and `MeanDifferencesTable` classes will pass a few keyword arguments into the formatting function so you should either include them as optional arguments or include `**kwargs` in your function.\n", - "\n", - "`ModelTable` passes the following arguments:\n", - "* `p_value`: float = the p-value for the parameter estimate\n", - "* `se`: float = the standard error of the parameter estimate\n", - "* `ci`: tuple(float, float) = a tuple containing the confidence interval of the parameter estimate\n", - "\n", - "`MeanDifferencesTable` passes:\n", - "* `p_value`: float = the p-value for the difference in means\n", - "* `se`: float = the standard error of the mean" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "def new_formatter(value, p_value=None, **kwargs):\n", - " \"\"\"\n", - " A custom formatter that will make all of the statistically significant parameters\n", - " bold and read in the table.\n", - " \"\"\"\n", - " if isinstance(value, str):\n", - " return value\n", - " bold = False\n", - " color = None\n", - " if isinstance(p_value, float):\n", - " if p_value <= 0.1:\n", - " bold = True\n", - " color = \"red\"\n", - " val = f\"{value:0.3f}\"\n", - " return {\"value\": val, \"bold\": bold, \"color\": color}\n", - "\n", - "\n", - "# clear all of the labels, formatters, parameter order, and other custom features\n", - "panel_table.reset_custom_features()\n", - "panel_table.default_formatter = new_formatter" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "
(1)(2)(3)
black-0.139***-0.139***
(0.024)(0.048)
const0.0920.0231.871***
(0.078)(0.151)(0.038)
educ0.091***0.092***
(0.005)(0.011)
exper0.067***0.106***
(0.014)(0.015)
expersq-0.002***-0.005***-0.005***
(0.001)(0.001)(0.001)
hisp0.0160.022
(0.021)(0.043)
married0.108***0.064***0.047**
(0.016)(0.017)(0.018)
union0.182***0.106***0.080***
(0.017)(0.018)(0.019)
year.19810.058*0.040
(0.030)(0.025)
year.19820.063*0.031
(0.033)(0.032)
year.19830.062*0.020
(0.037)(0.042)
year.19840.090**0.043
(0.040)(0.052)
year.19850.109**0.058
(0.043)(0.061)
year.19860.142***0.092
(0.046)(0.072)
year.19870.174***0.135*
(0.049)(0.082)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.459***68.409***27.959***
*p<0.1, **p<0.05, ***p<0.01
" - ], - "text/plain": [ - "------------------------------------------------------------------\n", - "+ (1) (2) (3) +\n", - "+================================================================+\n", - "+ black -0.139*** -0.139*** +\n", - "+ (0.024) (0.048) +\n", - "+ const 0.092 0.023 1.871*** +\n", - "+ (0.078) (0.151) (0.038) +\n", - "+ educ 0.091*** 0.092*** +\n", - "+ (0.005) (0.011) +\n", - "+ exper 0.067*** 0.106*** +\n", - "+ (0.014) (0.015) +\n", - "+ expersq -0.002*** -0.005*** -0.005*** +\n", - "+ (0.001) (0.001) (0.001) +\n", - "+ hisp 0.016 0.022 +\n", - "+ (0.021) (0.043) +\n", - "+ married 0.108*** 0.064*** 0.047** +\n", - "+ (0.016) (0.017) (0.018) +\n", - "+ union 0.182*** 0.106*** 0.080*** +\n", - "+ (0.017) (0.018) (0.019) +\n", - "+ year.1981 0.058* 0.040 +\n", - "+ (0.030) (0.025) +\n", - "+ year.1982 0.063* 0.031 +\n", - "+ (0.033) (0.032) +\n", - "+ year.1983 0.062* 0.020 +\n", - "+ (0.037) (0.042) +\n", - "+ year.1984 0.090** 0.043 +\n", - "+ (0.040) (0.052) +\n", - "+ year.1985 0.109** 0.058 +\n", - "+ (0.043) (0.061) +\n", - "+ year.1986 0.142*** 0.092 +\n", - "+ (0.046) (0.072) +\n", - "+ year.1987 0.174*** 0.135* +\n", - "+ (0.049) (0.082) +\n", - "==================================================================\n", - "+ Observations 4,360 4,360 4,360 +\n", - "+ N. Groups 545 545 545 +\n", - "+ R² 0.189 0.181 0.022 +\n", - "+ F Statistic 72.459*** 68.409*** 27.959*** +\n", - "------------------------------------------------------------------\n", - "*p<0.1, **p<0.05, ***p<0.01 " - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "panel_table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`statstables` can also make `longtables` in LaTex" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "fake = Faker()\n", - "Faker.seed(512)\n", - "np.random.seed(410)\n", - "names = [fake.name() for _ in range(100)]\n", - "x1 = np.random.randint(500, 10000, 100)\n", - "x2 = np.random.uniform(size=100)\n", - "longdata = pd.DataFrame({\"Names\": names, \"X1\": x1, \"X2\": x2})\n", - "longtable = st.tables.GenericTable(longdata, longtable=True, include_index=False)\n", - "longtable.render_latex(\"longtable.tex\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also make panel a panel table by combining two tables objects." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Panel A) Men \n", - "========================================\n", - "+ ID School +\n", - "+--------------------------------------+\n", - "+ Matthew Ortiz 1234 Texas +\n", - "+ Michael Costa 6789 UVA +\n", - "+ Samuel Johnson 1023 UMBC +\n", - "+ Dakota Snyder 5810 UGA +\n", - "+ Scott Mills 9182 Rice +\n", - "----------------------------------------\n", - "Panel B) Women \n", - "========================================================\n", - "+ ID School +\n", - "+------------------------------------------------------+\n", - "+ Erin Anderson 9183 Wake Forrest +\n", - "+ Michelle Zimmerman 5734 Emory +\n", - "+ Danielle King 1290 Texas +\n", - "+ Shannon Nelson 4743 UVA +\n", - "+ Stephanie Booth 8912 Columbia +\n", - "--------------------------------------------------------\n", - "\n" - ] - } - ], - "source": [ - "st.STParams[\"ascii_header_char\"] = \"=\"\n", - "# st.STParams[\"double_top_rule\"] = True\n", - "st.STParams[\"ascii_mid_rule_char\"] = \"-\"\n", - "fake = Faker()\n", - "Faker.seed(202)\n", - "panela_df = pd.DataFrame(\n", - " {\n", - " \"ID\": [1234, 6789, 1023, 5810, 9182],\n", - " \"School\": [\"Texas\", \"UVA\", \"UMBC\", \"UGA\", \"Rice\"],\n", - " },\n", - " index=[fake.name_male() for _ in range(5)],\n", - ")\n", - "panela = tables.GenericTable(\n", - " panela_df,\n", - " formatters={\"ID\": lambda x: f\"{x}\"},\n", - ")\n", - "panelb_df = pd.DataFrame(\n", - " {\n", - " \"ID\": [9183, 5734, 1290, 4743, 8912],\n", - " \"School\": [\"Wake Forrest\", \"Emory\", \"Texas\", \"UVA\", \"Columbia\"],\n", - " },\n", - " index=[fake.name_female() for _ in range(5)],\n", - ")\n", - "panelb = tables.GenericTable(panelb_df, formatters={\"ID\": lambda x: f\"{x}\"})\n", - "panel = tables.PanelTable([panela, panelb], [\"Men\", \"Women\"])\n", - "print(panel)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "panel.render_latex(\"panel.tex\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If the name you use for the dependent variable is particularly long, the resulting LaTex table might look a bit off with the last column being noticeably wider than the others. You can fix this by using the `tabularx` environment instead of `tabular`. You may also want to use this if you'd just like a wider table." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - " \n", - " \n", - "
Dependent Variable: A Long Title That Would Look Odd
(1)(2)(3)
α0.0920.0231.871
(0.078)(0.151)(0.038)
Experience0.0670.106
(0.014)(0.015)
Experience²-0.002-0.005-0.005
(0.001)(0.001)(0.001)
Union0.1820.1060.080
(0.017)(0.018)(0.019)
Married0.1080.0640.047
(0.016)(0.017)(0.018)
Black-0.139-0.139
(0.024)(0.048)
Observations4,3604,3604,360
N. Groups545545545
R20.1890.1810.022
F Statistic72.45968.40927.959
" - ], - "text/plain": [ - "==================================================================\n", - "+ Dependent Variable: A Long Title That Would Look Odd +\n", - "+ -------------------------------------------------------------- +\n", - "+ (1) (2) (3) +\n", - "+----------------------------------------------------------------+\n", - "+ α 0.092 0.023 1.871 +\n", - "+ (0.078) (0.151) (0.038) +\n", - "+ Experience 0.067 0.106 +\n", - "+ (0.014) (0.015) +\n", - "+ Experience² -0.002 -0.005 -0.005 +\n", - "+ (0.001) (0.001) (0.001) +\n", - "+ Union 0.182 0.106 0.080 +\n", - "+ (0.017) (0.018) (0.019) +\n", - "+ Married 0.108 0.064 0.047 +\n", - "+ (0.016) (0.017) (0.018) +\n", - "+ Black -0.139 -0.139 +\n", - "+ (0.024) (0.048) +\n", - "------------------------------------------------------------------\n", - "+ Observations 4,360 4,360 4,360 +\n", - "+ N. Groups 545 545 545 +\n", - "+ R² 0.189 0.181 0.022 +\n", - "+ F Statistic 72.459 68.409 27.959 +\n", - "------------------------------------------------------------------" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "panel_table_long_name = st.tables.ModelTable(\n", - " [pooled_mod, random_mod, panel_mod],\n", - " covariate_labels=covariate_labels,\n", - " covariate_order=covariate_order,\n", - " dependent_variable_name=\"A Long Title That Would Look Odd\",\n", - " dependent_var_cover_index=True,\n", - " dependent_var_alignment=\"r\",\n", - " show_stars=False,\n", - " show_significance_levels=False,\n", - " use_tabularx=True,\n", - ")\n", - "panel_table_long_name.render_latex(outfile=\"wage_table_long_name.tex\")\n", - "panel_table_long_name" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "==================================================================\n", - "+ Dependent Variable: A Long Title That Would Look Odd +\n", - "+ -------------------------------------------------------------- +\n", - "+ (1) (2) (3) +\n", - "+----------------------------------------------------------------+\n", - "+ α 0.092 0.023 1.871 +\n", - "+ (0.078) (0.151) (0.038) +\n", - "+ Experience 0.067 0.106 +\n", - "+ (0.014) (0.015) +\n", - "+ Experience² -0.002 -0.005 -0.005 +\n", - "+ (0.001) (0.001) (0.001) +\n", - "+ Union 0.182 0.106 0.080 +\n", - "+ (0.017) (0.018) (0.019) +\n", - "+ Married 0.108 0.064 0.047 +\n", - "+ (0.016) (0.017) (0.018) +\n", - "+ Black -0.139 -0.139 +\n", - "+ (0.024) (0.048) +\n", - "------------------------------------------------------------------\n", - "+ Observations 4,360 4,360 4,360 +\n", - "+ N. Groups 545 545 545 +\n", - "+ R² 0.189 0.181 0.022 +\n", - "+ F Statistic 72.459 68.409 27.959 +\n", - "------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "print(panel_table_long_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "statstables-dev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 + "metadata": { + "kernelspec": { + "display_name": "statstables-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } diff --git a/statstables/__init__.py b/statstables/__init__.py index 6e0ffce..68c0bbc 100644 --- a/statstables/__init__.py +++ b/statstables/__init__.py @@ -1,3 +1,4 @@ +from typing import Any from statstables import ( tables, renderers, @@ -7,18 +8,6 @@ cellformatting, ) from statstables.parameters import STParams -from statsmodels.base.wrapper import ResultsWrapper -from statsmodels.regression.linear_model import RegressionResultsWrapper -from statsmodels.discrete.discrete_model import ( - BinaryResultsWrapper, - PoissonResultsWrapper, -) -from linearmodels.iv.results import IVResults, OLSResults -from linearmodels.panel.results import ( - PanelEffectsResults, - PanelResults, - RandomEffectsResults, -) __all__ = [ "STParams", @@ -27,19 +16,69 @@ "modeltables", "renderers", "utils", - "ResultsWrapper", "parameters", "cellformatting", ] -SupportedModels = { - RegressionResultsWrapper: modeltables.StatsModelsData, - ResultsWrapper: modeltables.StatsModelsData, - BinaryResultsWrapper: modeltables.StatsModelsData, - PoissonResultsWrapper: modeltables.StatsModelsData, - IVResults: modeltables.LinearModelsData, - OLSResults: modeltables.LinearModelsData, - PanelEffectsResults: modeltables.LinearModelsData, - PanelResults: modeltables.LinearModelsData, - RandomEffectsResults: modeltables.LinearModelsData, -} + +class SupportedModelsClass(dict): + def __init__(self, models: dict): + super().__init__() + self.models = models + + def add_model(self, model_results_class: Any, output_class: Any) -> None: + """ + Add a custom model without giving it a default name + + Parameters + ---------- + model_results_class : Any + The model class you want to add + + Returns + ------- + None + """ + self[type(model_results_class)] = output_class + + @staticmethod + def _keyname(key: str): + return key.replace("", "") + + def __setitem__(self, key: str, value: Any): + msg = "Custom models must inherit from the ModelData class" + assert value.__base__ == modeltables.ModelData, msg + self.models[key] = value + + def __getitem__(self, key: str): + # custom models will be saved with their type as the key, but the natively + # supported models are passed in as strings (see initialization below) + # so they will be found in the first exception + try: + return self.models[type(key)] + except KeyError: + try: + return self.models[self._keyname(str(key))] + except KeyError: + msg = ( + f"{key} is unsupported. To use custom models, " + "add them to the `st.SupportedModels` dictionary." + ) + raise KeyError(msg) + + +SupportedModels = SupportedModelsClass( + { + "statsmodels.regression.linear_model.RegressionResultsWrapper": modeltables.StatsModelsData, + "statsmodels.base.wrapper.ResultsWrapper": modeltables.StatsModelsData, + "statsmodels.discrete.discrete_model.BinaryResultsWrapper": modeltables.StatsModelsData, + "statsmodels.discrete.discrete_model.PoissonResultsWrapper": modeltables.StatsModelsData, + "linearmodels.iv.results.IVResults": modeltables.LinearModelsData, + "linearmodels.iv.results.OLSResults": modeltables.LinearModelsData, + "linearmodels.panel.results.PanelEffectsResults": modeltables.LinearModelsData, + "linearmodels.panel.results.PanelResults": modeltables.LinearModelsData, + "linearmodels.panel.results.RandomEffectsResults": modeltables.LinearModelsData, + "pyfixest.estimation.feols_.Feols": modeltables.PyFixestModel, + "pyfixest.estimation.fepois_.Fepois": modeltables.PyFixestModel, + } +) diff --git a/statstables/modeltables.py b/statstables/modeltables.py index ed59929..5efd351 100644 --- a/statstables/modeltables.py +++ b/statstables/modeltables.py @@ -1,28 +1,8 @@ # Tables that can be used to export model information +import numpy as np from abc import ABC, abstractmethod -from typing import Any, TypeAlias +from typing import Any from dataclasses import dataclass -from statsmodels.base.wrapper import ResultsWrapper -from statsmodels.regression.linear_model import RegressionResultsWrapper -from statsmodels.discrete.discrete_model import BinaryResultsWrapper -from linearmodels.iv.results import IVResults, OLSResults -from linearmodels.panel.results import ( - PanelEffectsResults, - PanelResults, - RandomEffectsResults, -) - -ModelTypes: TypeAlias = ( - ResultsWrapper - | RegressionResultsWrapper - | IVResults - | OLSResults - | PanelEffectsResults - | PanelResults - | RandomEffectsResults - | BinaryResultsWrapper - | Any -) # model stats that should always be formatted as integers INT_VARS = ["observations", "ngroups"] @@ -30,7 +10,7 @@ @dataclass class ModelData(ABC): - model: ModelTypes + model: Any def __post_init__(self): self.data = {} @@ -153,7 +133,37 @@ def pull_params(self) -> None: self.data["dependent_variable"] = self.model.summary.tables[0].data[0][1] self.data["fstat"] = self.model.f_statistic.stat self.data["fstat_pvalue"] = self.model.f_statistic.pval - if isinstance( - self.model, (PanelEffectsResults, RandomEffectsResults, PanelResults) - ): + if self.model.__class__.__name__ in [ + "PanelEffectsResults", + "RandomEffectsResults", + "PanelResults", + ]: self.data["ngroups"] = self.model.entity_info.total + + +PYFIXEST_MAP = {"observations": "_N", "r2": "_r2"} + + +@dataclass +class PyFixestModel(ModelData): + def __post_init__(self): + super().__post_init__() + + def pull_params(self): + for info, attr in PYFIXEST_MAP.items(): + try: + val = getattr(self.model, attr) + if np.isnan(val): + val = "" + self.data[info] = val + except AttributeError: + pass + params = self.model.coef() + self.data["params"] = params + self.data["param_labels"] = set(params.index.values) + self.data["sterrs"] = self.model.se() + confint = self.model.confint() + self.data["cis_low"] = confint["2.5%"] + self.data["cis_high"] = confint["97.5%"] + self.data["dependent_variable"] = self.model._depvar + self.data["pvalues"] = self.model.pvalue() diff --git a/statstables/parameters.py b/statstables/parameters.py index 8b2952c..2704a8c 100644 --- a/statstables/parameters.py +++ b/statstables/parameters.py @@ -185,7 +185,7 @@ def _validate_param(self, name, value): "show_model_numbers": True, "p_values": [0.1, 0.05, 0.01], "show_stars": True, - "show_model_type": True, + "show_model_type": False, "dependent_variable": "", "include_index": True, "show_significance_levels": True, diff --git a/statstables/tables.py b/statstables/tables.py index 75fb79d..85e0d61 100644 --- a/statstables/tables.py +++ b/statstables/tables.py @@ -1244,15 +1244,8 @@ def __init__( dep_vars = [] # pull the parameters from each model for mod in models: - try: - mod_obj = st.SupportedModels[type(mod)](mod) - self.models.append(mod_obj) - except KeyError as e: - msg = ( - f"{type(mod)} is unsupported. To use custom models, " - "add them to the `st.SupportedModels` dictionary." - ) - raise KeyError(msg) from e + mod_obj = st.SupportedModels[type(mod)](mod) + self.models.append(mod_obj) self.params.update(mod_obj.param_labels) dep_vars.append(mod_obj.dependent_variable) @@ -1585,7 +1578,7 @@ def __init__( assert panel_label_alignment in self.VALID_ALIGNMENTS self.panel_label_alignment = panel_label_alignment - def render_latex(self, outfile) -> str | None: + def render_latex(self, outfile, **kwargs) -> str | None: # assign multicolumns to each table match self.enumerate_type: case "alpha_upper": diff --git a/statstables/tests/test_tables.py b/statstables/tests/test_tables.py index a5d9733..07e7146 100644 --- a/statstables/tests/test_tables.py +++ b/statstables/tests/test_tables.py @@ -2,16 +2,21 @@ Tests implementation of tables """ +import copy import pytest import pandas as pd import numpy as np import statsmodels.formula.api as smf +import pyfixest as pf +from linearmodels.datasets import mroz, wage_panel +from linearmodels.iv import IV2SLS +from linearmodels.panel import PooledOLS, RandomEffects, PanelOLS from statsmodels.api import add_constant -from statstables import tables +from statstables import tables, modeltables, SupportedModels from faker import Faker from pathlib import Path -from linearmodels.datasets import wage_panel -from linearmodels.panel import PooledOLS, RandomEffects, PanelOLS +from dataclasses import dataclass + CUR_PATH = Path(__file__).resolve().parent @@ -97,7 +102,7 @@ def test_mean_differences_table(data): assert table.table_params["include_index"] == True -def test_model_table(data): +def test_model_table_statsmodels(data): mod1 = smf.ols("A ~ B + C -1", data=data).fit() mod2 = smf.ols("A ~ B + C", data=data).fit() mod_table = tables.ModelTable(models=[mod1, mod2]) @@ -119,6 +124,88 @@ def test_model_table(data): assert binary_table.table_params["include_index"] == True +def test_model_table_linearmodels(): + """ + Test model table with linear models results + """ + # IV results + data = mroz.load() + data = data.dropna() + data = add_constant(data, has_constant="add") + iv = IV2SLS(np.log(data.wage), data[["const"]], data.educ, data.fatheduc).fit( + cov_type="unadjusted" + ) + ivtable = tables.ModelTable(models=[iv.first_stage.individual["educ"], iv]) + ivtable.rename_covariates( + { + "const": "Intercept", + "educ": "Education", + "fatheduc": "Father Education", + } + ) + ivtable.parameter_order(["const", "fatheduc", "educ"]) + + # panel and random effects models + data = wage_panel.load() + year = pd.Categorical(data.year) + data = data.set_index(["nr", "year"]) + data["year"] = year + exog_vars = [ + "black", + "hisp", + "exper", + "expersq", + "married", + "educ", + "union", + "year", + ] + exog = add_constant(data[exog_vars]) + pooled_mod = PooledOLS(data.lwage, exog).fit() + random_mod = RandomEffects(data.lwage, exog).fit() + exog_vars = [ + "expersq", + "union", + "married", + ] + panel_exog = add_constant(data[exog_vars]) + panel_mod = PanelOLS( + data.lwage, panel_exog, entity_effects=True, time_effects=True + ).fit() + panel_table = tables.ModelTable([pooled_mod, random_mod, panel_mod]) + panel_table.dependent_variable_name = "Log(Wage)" + panel_table.rename_covariates( + { + "const": "Intercept", + "exper": "Experience", + "expersq": "Experience Squared", + "union": "Union", + "married": "Married", + "black": "Black", + } + ) + panel_table.parameter_order( + ["const", "exper", "expersq", "union", "married", "black"] + ) + + +def test_model_table_pyfixest(): + data = pf.get_data() + feols = pf.feols("Y ~ X1 | f1 + f2", data=data) + poisson_data = pf.get_data(model="Fepois") + fepois = pf.fepois("Y ~ X1 + X2 | f1 + f2", data=poisson_data) + pyfixest_table = tables.ModelTable([feols, fepois]) + temp_path = Path("pyfixest_tables_actual.tex") + expected_path = Path(CUR_PATH, "..", "..", "pyfixest_tables.tex") + compare_expected_output( + expected_file=expected_path, + actual_table=pyfixest_table, + render_type="tex", + temp_file=temp_path, + only_tabular=True, + ) + + def test_long_table(): fake = Faker() Faker.seed(512) @@ -231,17 +318,94 @@ def test_linear_models(): ) +def test_custom_model_table(data): + # test adding a custom model + @dataclass + class CustomTable(modeltables.ModelData): + def __post_init__(self): + super().__post_init__() + ... + + def pull_params(self): + pass + + SupportedModels["custom_model"] = CustomTable + + # test bad model instance + @dataclass + class BadCustomTable: + def __post__init(self): ... + + with pytest.raises(AssertionError): + SupportedModels["bad_model"] = BadCustomTable + + # create a fake custom class by wrapping statsmodels results to test + PARAMETER_MAP = { + "params": "params", + "sterrs": "bse", + "r2": "rsquared", + "pvalues": "pvalues", + "adjusted_r2": "rsquared_adj", + "fstat": "fvalue", + "fstat_pvalue": "f_pvalue", + "observations": "nobs", + "dof_model": "df_model", + "dof_resid": "df_resid", + } + + class ModelWrapper: + """ + Wraps the statsmodels results to use as an example for the test + """ + + def __init__(self, result): + for _, attr in PARAMETER_MAP.items(): + setattr(self, attr, getattr(result, attr)) + self.conf_int = result.conf_int() + self.endog_names = result.model.endog_names + + @dataclass + class CustomResults(modeltables.ModelData): + def __post_init__(self): + super().__post_init__() + + def pull_params(self): + for info, attr in PARAMETER_MAP.items(): + try: + self.data[info] = getattr(self.model, attr) + except AttributeError: + pass + self.data["param_labels"] = set(self.model.params.index.values) + self.data["cis_low"] = self.model.conf_int[0] + self.data["cis_high"] = self.model.conf_int[1] + self.data["dependent_variable"] = self.model.endog_names + + mod = ModelWrapper(smf.ols("A ~ B + C", data=data).fit()) + SupportedModels.add_model(ModelWrapper, CustomResults) + table = tables.ModelTable([mod]) + table.render_latex() + table.render_html() + table.render_ascii() + + def compare_expected_output( - expected_file: Path, actual_table: tables.Table, render_type: str, temp_file: Path + expected_file: Path, + actual_table: tables.Table, + render_type: str, + temp_file: Path, + only_tabular: bool = False, ): match render_type: case "tex": - actual_table.render_latex(temp_file) + actual_table.render_latex(temp_file, only_tabular=only_tabular) actual_text = temp_file.read_text() expected_text = expected_file.read_text() - try: - assert actual_text == expected_text - temp_file.unlink() - except AssertionError as e: - msg = f"Output has changed. New output in {str(temp_file)}" - raise e(msg) + msg = f"Output has changed. New output in {str(temp_file)}" + assert actual_text == expected_text, msg + temp_file.unlink() + # try: + # assert actual_text == expected_text + # temp_file.unlink() + # except AssertionError as e: + # msg = f"Output has changed. New output in {str(temp_file)}" + # raise e(msg) diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 0000000..0bd689c --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,8 @@ +numpy +pandas +scipy +unicodeit +statsmodels +linearmodels +pyfixest +faker