diff --git a/code/.ipynb_checkpoints/titanic-checkpoint.ipynb b/code/.ipynb_checkpoints/titanic-checkpoint.ipynb index 28e8028..be93097 100644 --- a/code/.ipynb_checkpoints/titanic-checkpoint.ipynb +++ b/code/.ipynb_checkpoints/titanic-checkpoint.ipynb @@ -9,28 +9,6 @@ "[Variable Descriptions](https://www.kaggle.com/c/titanic/data)." ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "df = pd.read_csv('titanic.csv')" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -331,11 +309,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Write a function that takes N random samples of 100 passengers, and returns the fraction of those samples where at least 42 passengers survive.**\n", + "**Take random samples of 100 passengers and find out how many you need before the fraction of those samples where at least 42 passengers survive matches the probability you calculated previously (within Δp≈0.05).**\n", "\n", - "Choose a random seed and find approximately how many random samples you need to take before your fraction matches the probability you calculated (within Δp≈0.05Δp≈0.05).\n", + "Answers will vary based on chosen seeds. What would happen if you drew every sample with the same seed?\n", "\n", - "It may help to visualize the survival fraction vs the number of random samples. Answers will vary based on the seed." + "Plot the survival fraction vs the number of random samples." ] }, { @@ -355,7 +333,7 @@ "- The ages of male and female survivors?\n", "- The fares paid by passengers from Queenstown and the passengers from Cherbourg?\n", "\n", - "**If so, at what level? If not, how do you know?**" + "**Use a 95% confidence level.**" ] }, { @@ -367,15 +345,6 @@ "outputs": [], "source": [] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -392,20 +361,11 @@ "outputs": [], "source": [] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Did survivors pay more for their tickets than those that did not? If so, to what significance level?**" + "**Did survivors pay more for their tickets than those that did not? Use a 95% confidence level.**" ] }, { @@ -421,7 +381,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Did a given first-class passenger have less family members on board than a given third-class passenger? If so, to what significance level?**" + "**Did a given first-class passenger have fewer family members on board than a given third-class passenger? Use a 95% confidence level.**" ] }, { diff --git a/code/titanic.ipynb b/code/titanic.ipynb index 28e8028..be93097 100644 --- a/code/titanic.ipynb +++ b/code/titanic.ipynb @@ -9,28 +9,6 @@ "[Variable Descriptions](https://www.kaggle.com/c/titanic/data)." ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "df = pd.read_csv('titanic.csv')" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -331,11 +309,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Write a function that takes N random samples of 100 passengers, and returns the fraction of those samples where at least 42 passengers survive.**\n", + "**Take random samples of 100 passengers and find out how many you need before the fraction of those samples where at least 42 passengers survive matches the probability you calculated previously (within Δp≈0.05).**\n", "\n", - "Choose a random seed and find approximately how many random samples you need to take before your fraction matches the probability you calculated (within Δp≈0.05Δp≈0.05).\n", + "Answers will vary based on chosen seeds. What would happen if you drew every sample with the same seed?\n", "\n", - "It may help to visualize the survival fraction vs the number of random samples. Answers will vary based on the seed." + "Plot the survival fraction vs the number of random samples." ] }, { @@ -355,7 +333,7 @@ "- The ages of male and female survivors?\n", "- The fares paid by passengers from Queenstown and the passengers from Cherbourg?\n", "\n", - "**If so, at what level? If not, how do you know?**" + "**Use a 95% confidence level.**" ] }, { @@ -367,15 +345,6 @@ "outputs": [], "source": [] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -392,20 +361,11 @@ "outputs": [], "source": [] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Did survivors pay more for their tickets than those that did not? If so, to what significance level?**" + "**Did survivors pay more for their tickets than those that did not? Use a 95% confidence level.**" ] }, { @@ -421,7 +381,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Did a given first-class passenger have less family members on board than a given third-class passenger? If so, to what significance level?**" + "**Did a given first-class passenger have fewer family members on board than a given third-class passenger? Use a 95% confidence level.**" ] }, {