Skip to content

Add support for duckreg#7

Draft
s3alfisc wants to merge 1 commit intomainfrom
duckreg-support
Draft

Add support for duckreg#7
s3alfisc wants to merge 1 commit intomainfrom
duckreg-support

Conversation

@s3alfisc
Copy link
Copy Markdown
Member

@s3alfisc s3alfisc commented Nov 20, 2025

Example:

import numpy as np
import pandas as pd
from duckreg.estimators import DuckRegression
import duckdb

# Generate sample data
def generate_sample_data(N=10_000_000, seed=12345):
    rng = np.random.default_rng(seed)
    D = rng.choice([0, 1], size=(N, 1))
    X = rng.choice(range(20), (N, 2), True)
    Y = D + X @ np.array([1, 2]).reshape(2, 1) + rng.normal(size=(N, 1))
    Y2 = -1 * D + X @ np.array([1, 2]).reshape(2, 1) + rng.normal(size=(N, 1))
    df = pd.DataFrame(
        np.concatenate([Y, Y2, D, X], axis=1), columns=["Y", "Y2", "D", "f1", "f2"]
    ).assign(rowid=range(N))
    return df


# Function to create and populate DuckDB database
def create_duckdb_database(df, db_name="large_dataset.db", table="data"):
    conn = duckdb.connect(db_name)
    conn.execute(f"DROP TABLE IF EXISTS {table}")
    conn.execute(f"CREATE TABLE {table} AS SELECT * FROM df")
    conn.close()
    print(f"Data loaded into DuckDB database: {db_name}")

# Generate and save data
df = generate_sample_data()
db_name = "large_dataset.db"
create_duckdb_database(df, db_name)

db_name = "large_dataset.db"
conn = duckdb.connect(db_name)
query = "SELECT * FROM data limit 5"
conn.execute(query).fetchdf()

m1 = DuckRegression(
    db_name="large_dataset.db",
    table_name="data",
    formula="Y ~ D + f1 + f2",
    cluster_col="f1",
    n_bootstraps=100,
    seed=42,
)
m1.fit()
m1.fit_vcov()
results = m1.summary()

m2 = DuckRegression(
    db_name="large_dataset.db",
    table_name="data",
    formula="Y ~ D + f1",
    cluster_col="f1",
    n_bootstraps=100,
    seed=42,
)
m2.fit()
m2.fit_vcov()
results = m2.summary()

table = mt.ETable([m2, m1], digits = 6, caption = 'duckreg regressions')
table
image

@s3alfisc
Copy link
Copy Markdown
Member Author

cc @apoorvalal ;-)

@s3alfisc s3alfisc marked this pull request as draft November 20, 2025 21:20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant