-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAB_TESTING.py
More file actions
77 lines (50 loc) · 2.85 KB
/
AB_TESTING.py
File metadata and controls
77 lines (50 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#####################################################
# Data Preparation and Analysis
#####################################################
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.stats.api as sms
from scipy.stats import ttest_1samp, shapiro, levene, ttest_ind, mannwhitneyu, \
pearsonr, spearmanr, kendalltau, f_oneway, kruskal
from statsmodels.stats.proportion import proportions_ztest
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_rows", 10)
pd.set_option("display.float_format", lambda x: "%.5f" % x)
df_control = pd.read_excel("ab_testing.xlsx", sheet_name="Control Group")
df_test = pd.read_excel("ab_testing.xlsx", sheet_name="Test Group")
df_control.describe().T
df_test.describe().T
df_control.groupby("Purchase")[["Impression", "Click", "Earning"]].mean()
df_test.groupby("Purchase")[["Impression", "Click", "Earning"]].mean()
# After the analysis, combining the control and test group datasets using the concat method.
df_control["group"] = "control"
df_test["group"] = "test"
df = pd.concat([df_control, df_test], axis=0, ignore_index=True)
df
#####################################################
# Defining the Hypothesis of the A/B Test
#####################################################
# H0: M1 = M2 (Reject H0 if p < 0.05)
# H1: M1 ≠ M2
# Analyzing the average Purchase (revenue) values for the control and test groups.
df.groupby("group")["Purchase"].mean()
#####################################################
# Performing the Hypothesis Test
#####################################################
# H0: Data follows a normal distribution (if p < 0.05 → not normal)
# H0: Variances are homogeneous (if p < 0.05 → not homogeneous)
test_stat, pvalue = shapiro(df.loc[df["group"] == "control", "Purchase"]) # distribution is normal
print("Test Stat = %.4f, p-value = %.4f" % (test_stat, pvalue))
test_stat, pvalue = shapiro(df.loc[df["group"] == "test", "Purchase"]) # distribution is normal
print("Test Stat = %.4f, p-value = %.4f" % (test_stat, pvalue))
test_stat, pvalue = levene(df.loc[df["group"] == "control", "Purchase"], df.loc[df["group"] == "test", "Purchase"]) # varience is homogeneous
print("Test Stat = %.4f, p-value = %.4f" % (test_stat, pvalue))
# Because the distributions are normal and varience is homogeneous, we're picking the T test which is parametric.
test_stat, pvalue = ttest_ind(df.loc[df["group"] == "control", "Purchase"],
df.loc[df["group"] == "test", "Purchase"])
print("Test Stat = %.4f, p-value = %.4f" % (test_stat, pvalue))
### There is no statistically significant difference between the purchase averages of the control and test groups.