From e1cb733bcb57e441b173760c67ee46283e42fa64 Mon Sep 17 00:00:00 2001 From: contractorwolf Date: Fri, 6 Apr 2018 15:15:07 -0400 Subject: [PATCH 1/7] changed default sprite size to 32 to work better with large datasets --- facets_dive/Dive_demo.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/facets_dive/Dive_demo.ipynb b/facets_dive/Dive_demo.ipynb index 8651710..d073cea 100644 --- a/facets_dive/Dive_demo.ipynb +++ b/facets_dive/Dive_demo.ipynb @@ -33,7 +33,7 @@ "data": { "text/html": [ "\n", - " \n", + " \n", " " ], "text/plain": [ @@ -51,15 +55,26 @@ "# Display the Dive visualization for this data\n", "from IPython.core.display import display, HTML\n", "\n", + "# Create Facets template \n", "HTML_TEMPLATE = \"\"\"\n", - " \n", + " \n", " \"\"\"\n", - "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", + "\n", + "# Load the json dataset and the sprite_size into the template\n", + "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n", + "\n", + "# Display the template\n", "display(HTML(html))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -78,7 +93,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.4.3" + "version": "3.5.2" } }, "nbformat": 4, From afcaf46b2510df535e8ea87d8aee69c22001bc33 Mon Sep 17 00:00:00 2001 From: james wolf Date: Tue, 17 Apr 2018 10:54:37 -0400 Subject: [PATCH 3/7] testing what breaks githubs ability to display this notebook --- facets_dive/Dive_demo.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/facets_dive/Dive_demo.ipynb b/facets_dive/Dive_demo.ipynb index 8624ccb..297a28b 100644 --- a/facets_dive/Dive_demo.ipynb +++ b/facets_dive/Dive_demo.ipynb @@ -22,7 +22,8 @@ "\n", "# set the sprite_size based on the number of records in dataset,\n", "# larger datasets can crash the browser if the size is too large (>50000)\n", - "sprite_size = 32 if len(df.index)>50000 else 64\n", + "# sprite_size = 32 if len(df.index)>50000 else 64\n", + "sprite = 32\n", "\n", "jsonstr = df.to_json(orient='records')" ] @@ -93,7 +94,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.4" } }, "nbformat": 4, From b460d5cc1ba90e68503b3f69790c0bc6b09ea8f1 Mon Sep 17 00:00:00 2001 From: james wolf Date: Tue, 17 Apr 2018 10:59:18 -0400 Subject: [PATCH 4/7] testing github display issues --- facets_dive/Dive_demo.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/facets_dive/Dive_demo.ipynb b/facets_dive/Dive_demo.ipynb index 297a28b..a198bc7 100644 --- a/facets_dive/Dive_demo.ipynb +++ b/facets_dive/Dive_demo.ipynb @@ -23,7 +23,7 @@ "# set the sprite_size based on the number of records in dataset,\n", "# larger datasets can crash the browser if the size is too large (>50000)\n", "# sprite_size = 32 if len(df.index)>50000 else 64\n", - "sprite = 32\n", + "#sprite = 32\n", "\n", "jsonstr = df.to_json(orient='records')" ] @@ -58,13 +58,13 @@ "\n", "# Create Facets template \n", "HTML_TEMPLATE = \"\"\"\n", - " \n", + " \n", " \"\"\"\n", "\n", "# Load the json dataset and the sprite_size into the template\n", - "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n", + "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", "\n", "# Display the template\n", "display(HTML(html))" From ed21f56031d11ae758386b29bde0e7deaa0ce053 Mon Sep 17 00:00:00 2001 From: james wolf Date: Tue, 17 Apr 2018 11:01:14 -0400 Subject: [PATCH 5/7] testing github display issues2 --- facets_dive/Dive_demo.ipynb | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/facets_dive/Dive_demo.ipynb b/facets_dive/Dive_demo.ipynb index a198bc7..eaaff61 100644 --- a/facets_dive/Dive_demo.ipynb +++ b/facets_dive/Dive_demo.ipynb @@ -11,21 +11,13 @@ "features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", " \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", " \"Hours per week\", \"Country\", \"Target\"]\n", - "\n", - "# Load dataframe from external CSV and add header information\n", - "df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", - " names=features, # name features for header row\n", - " sep=r'\\s*,\\s*', # separator used in this dataset\n", + "jsonstr = pd.read_csv(\n", + " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", + " names=features,\n", + " sep=r'\\s*,\\s*',\n", " engine='python',\n", - " skiprows=[0], # skip first row without data \n", - " na_values=\"?\") # add ? where data is missing\n", - "\n", - "# set the sprite_size based on the number of records in dataset,\n", - "# larger datasets can crash the browser if the size is too large (>50000)\n", - "# sprite_size = 32 if len(df.index)>50000 else 64\n", - "#sprite = 32\n", - "\n", - "jsonstr = df.to_json(orient='records')" + " skiprows=[0],\n", + " na_values=\"?\").to_json(orient='records')" ] }, { @@ -56,17 +48,13 @@ "# Display the Dive visualization for this data\n", "from IPython.core.display import display, HTML\n", "\n", - "# Create Facets template \n", "HTML_TEMPLATE = \"\"\"\n", - " \n", + " \n", " \"\"\"\n", - "\n", - "# Load the json dataset and the sprite_size into the template\n", "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", - "\n", - "# Display the template\n", "display(HTML(html))" ] }, From 4fbdd0e03643747666b7e54976ad1eaf4d1600fa Mon Sep 17 00:00:00 2001 From: james wolf Date: Tue, 17 Apr 2018 11:40:50 -0400 Subject: [PATCH 6/7] did conversion to HTML --- facets_dive/Dive_demo.html | 11851 +++++++++++++++++++++++++++++++++++ 1 file changed, 11851 insertions(+) create mode 100644 facets_dive/Dive_demo.html diff --git a/facets_dive/Dive_demo.html b/facets_dive/Dive_demo.html new file mode 100644 index 0000000..842ae06 --- /dev/null +++ b/facets_dive/Dive_demo.html @@ -0,0 +1,11851 @@ + + + +Dive_demo + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
# Load UCI census and convert to json for sending to the visualization
+import pandas as pd
+features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Marital Status",
+            "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss",
+            "Hours per week", "Country", "Target"]
+jsonstr = pd.read_csv(
+    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test",
+    names=features,
+    sep=r'\s*,\s*',
+    engine='python',
+    skiprows=[0],
+    na_values="?").to_json(orient='records')
+
+ +
+
+
+ +
+
+
+
In [2]:
+
+
+
# Display the Dive visualization for this data
+from IPython.core.display import display, HTML
+
+HTML_TEMPLATE = """<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html">
+        <facets-dive id="elem" height="600"></facets-dive>
+        <script>
+          var data = {jsonstr};
+          document.querySelector("#elem").data = data;
+        </script>"""
+html = HTML_TEMPLATE.format(jsonstr=jsonstr)
+display(HTML(html))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+ + + + + + From 98c388987af9ac169ab88b675366c35e4cf4b72f Mon Sep 17 00:00:00 2001 From: james wolf Date: Tue, 17 Apr 2018 11:50:06 -0400 Subject: [PATCH 7/7] set the dynamic sprite size, again --- facets_dive/Dive_demo.ipynb | 41 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/facets_dive/Dive_demo.ipynb b/facets_dive/Dive_demo.ipynb index eaaff61..6c883d5 100644 --- a/facets_dive/Dive_demo.ipynb +++ b/facets_dive/Dive_demo.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -11,21 +11,26 @@ "features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", " \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", " \"Hours per week\", \"Country\", \"Target\"]\n", - "jsonstr = pd.read_csv(\n", - " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", - " names=features,\n", - " sep=r'\\s*,\\s*',\n", + "\n", + "# Load dataframe from external CSV and add header information\n", + "df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", + " names=features, # name features for header row\n", + " sep=r'\\s*,\\s*', # separator used in this dataset\n", " engine='python',\n", - " skiprows=[0],\n", - " na_values=\"?\").to_json(orient='records')" + " skiprows=[0], # skip first row without data \n", + " na_values=\"?\") # add ? where data is missing\n", + "\n", + "# set the sprite_size based on the number of records in dataset,\n", + "# larger datasets can crash the browser if the size is too large (>50000)\n", + "sprite_size = 32 if len(df.index)>50000 else 64\n", + "\n", + "jsonstr = df.to_json(orient='records')\n" ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "scrolled": false - }, + "execution_count": 6, + "metadata": {}, "outputs": [ { "data": { @@ -48,14 +53,18 @@ "# Display the Dive visualization for this data\n", "from IPython.core.display import display, HTML\n", "\n", + "# Create Facets template \n", "HTML_TEMPLATE = \"\"\"\n", - " \n", + " \n", " \"\"\"\n", - "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", - "display(HTML(html))" + "\n", + "# Load the json dataset and the sprite_size into the template\n", + "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n", + "\n", + "# Display the template\n", + "display(HTML(html))\n" ] }, {