From 17090deb536065454e77bb02b51e7aef6ed57d09 Mon Sep 17 00:00:00 2001 From: sb1jan Date: Thu, 14 Mar 2024 15:16:39 +0000 Subject: [PATCH] Added raw notebook This is the first version of the notebook --- notebooks/covid_eda_raw.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/notebooks/covid_eda_raw.py b/notebooks/covid_eda_raw.py index 6507b62..f67fc0f 100644 --- a/notebooks/covid_eda_raw.py +++ b/notebooks/covid_eda_raw.py @@ -4,18 +4,16 @@ # COMMAND ---------- -!wget -q https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/hospitalizations/covid-hospitalizations.csv -O /tmp/covid-hospitalizations.csv +# MAGIC %md #### Transform # COMMAND ---------- -# MAGIC %md #### Transform +df = spark.sql("select * from aml_development.sample.covid_hospitalizations").toPandas() +print(df.head()) # COMMAND ---------- -import pandas as pd - # read from /tmp, subset for USA, pivot and fill missing values -df = pd.read_csv("/tmp/covid-hospitalizations.csv") df = df[df.iso_code == 'USA']\ .pivot_table(values='value', columns='indicator', index='date')\ .fillna(0)