-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAB_TESTING.py
More file actions
61 lines (45 loc) · 2.15 KB
/
AB_TESTING.py
File metadata and controls
61 lines (45 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
from scipy.stats import shapiro, levene, ttest_ind
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
dataframe_control = pd.read_excel("ab_testing.xlsx", sheet_name="Control Group")
dataframe_test = pd.read_excel("ab_testing.xlsx", sheet_name="Test Group")
df_control = dataframe_control.copy()
df_test = dataframe_test.copy()
def check_df(dataframe, head=5):
print("##################### Shape #####################")
print(dataframe.shape)
print("##################### Types #####################")
print(dataframe.dtypes)
print("##################### Head #####################")
print(dataframe.head())
print("##################### Tail #####################")
print(dataframe.tail())
print("##################### NA #####################")
print(dataframe.isnull().sum())
print("##################### Quantiles #####################")
print(dataframe.quantile([0, 0.05, 0.50, 0.95, 0.99, 1]).T)
check_df(df_control)
check_df(df_test)
df_control["group"] = "control"
df_test["group"] = "test"
df_control.head()
df_test.head()
df = pd.concat([df_control, df_test], axis=0, ignore_index=True)
df.groupby("group").agg({"Purchase": "mean"})
test_stat, pvalue = shapiro(df.loc[df["group"] == "control", "Purchase"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))
# p-value = 0.5891
test_stat, pvalue = shapiro(df.loc[df["group"] == "test", "Purchase"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))
# p-value = 0.1541
test_stat, pvalue = levene(df.loc[df["group"] == "control", "Purchase"],
df.loc[df["group"] == "test", "Purchase"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))
# p-value = 0.1083
test_stat, pvalue = ttest_ind(df.loc[df["group"] == "control", "Purchase"],
df.loc[df["group"] == "test", "Purchase"],
equal_var=True)
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))
# p-value = 0.3493