-
Notifications
You must be signed in to change notification settings - Fork 30
Description
here is the code to correct it:
#@markdown ---
#@markdown ### Enter the trix url for the sheet file containing the Client Sales Data:
#@markdown The spreadsheet should contain the mandatory columns:
#@markdown * date: date in the format YYYY-MM-DD
#@markdown * geo: the number which identifies the geo
#@markdown * response: variable on which you want to measure incrementality
#@markdown (e.g. sales, transactions)
#@markdown * cost: variable used as spend proxy (e.g. ad spend)
#@markdown Other columns can be present in the spreadsheet.
#@markdown Spreadsheet URL containing the geo level response and spend data
client_sales_table = "https://docs.google.com/spreadsheets/d/1bXZEUasXxEqE-lE0HrBInedrdOt2zoS3TyVbkE-xQco/edit?gid=0#gid=0" #@param {type:"string"}
#@markdown Leave the following field empty if you don't want to add constraint to the geo_eligibility
geo_eligibility_table = "https://docs.google.com/spreadsheets/d/1A_J8XCqXPd4i6WJruZ_6WDHMwe7wxiILwAKhY-_GV1w/edit?gid=0#gid=0" #@param {type:"string"}
auth.authenticate_user()
creds, _ = google_auth.default()
gc = gspread.authorize(creds)
wks = gc.open_by_url(client_sales_table).sheet1
data = wks.get_all_values()
headers = data.pop(0)
geo_level_time_series = pd.DataFrame(data, columns=headers)
geo_level_time_series["date"] = pd.to_datetime(geo_level_time_series["date"])
for colname in ["response", "geo", "cost"]:
geo_level_time_series[colname] = pd.to_numeric(geo_level_time_series[colname])
num_geos = geo_level_time_series["geo"].nunique()
if not geo_eligibility_table:
geo_eligibility = None
else:
wks = gc.open_by_url(geo_eligibility_table).sheet1
data = wks.get_all_values()
headers = data.pop(0)
geo_eligibility = pd.DataFrame(data, columns=headers)
for colname in ["geo", "control", "treatment", "exclude"]:
geo_eligibility[colname] = pd.to_numeric(geo_eligibility[colname])
## build defaults for ALL geos (eligible for either if not in the sheet)
all_geos = (geo_level_time_series[["geo"]]
.drop_duplicates()
.assign(control=1, treatment=1, exclude=0))
geo_eligibility = all_geos.merge(
geo_eligibility[["geo","control","treatment","exclude"]],
on="geo", how="left", suffixes=("", "_set")
)
for col in ["control", "treatment", "exclude"]:
geo_eligibility[col] = (
geo_eligibility[f"{col}_set"]
.fillna(geo_eligibility[col]) # keep defaults (1/1/0) if blank
.fillna(0)
.astype(int)
)
geo_eligibility.drop(columns=[c for c in geo_eligibility.columns if c.endswith("_set")],
inplace=True)
geo_eligibility = geoeligibility.GeoEligibility(geo_eligibility)
geo_eligibility.data.index = (
pd.to_numeric(geo_eligibility.data.index, downcast="integer").astype(str)
)