diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb old mode 100755 new mode 100644 index c1bb43d..de12b45 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -19,12 +19,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "from scipy import stats\n", + "from scipy.stats import ttest_ind\n", + "from scipy.stats import ttest_rel" ] }, { @@ -38,11 +41,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False \n", + "\n", + "RangeIndex: 800 entries, 0 to 799\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 # 800 non-null int64 \n", + " 1 Name 800 non-null object\n", + " 2 Type 1 800 non-null object\n", + " 3 Type 2 414 non-null object\n", + " 4 Total 800 non-null int64 \n", + " 5 HP 800 non-null int64 \n", + " 6 Attack 800 non-null int64 \n", + " 7 Defense 800 non-null int64 \n", + " 8 Sp. Atk 800 non-null int64 \n", + " 9 Sp. Def 800 non-null int64 \n", + " 10 Speed 800 non-null int64 \n", + " 11 Generation 800 non-null int64 \n", + " 12 Legendary 800 non-null bool \n", + "dtypes: bool(1), int64(9), object(3)\n", + "memory usage: 75.9+ KB\n", + "None\n", + " # Total HP Attack Defense Sp. Atk \\\n", + "count 800.000000 800.00000 800.000000 800.000000 800.000000 800.000000 \n", + "mean 362.813750 435.10250 69.258750 79.001250 73.842500 72.820000 \n", + "std 208.343798 119.96304 25.534669 32.457366 31.183501 32.722294 \n", + "min 1.000000 180.00000 1.000000 5.000000 5.000000 10.000000 \n", + "25% 184.750000 330.00000 50.000000 55.000000 50.000000 49.750000 \n", + "50% 364.500000 450.00000 65.000000 75.000000 70.000000 65.000000 \n", + "75% 539.250000 515.00000 80.000000 100.000000 90.000000 95.000000 \n", + "max 721.000000 780.00000 255.000000 190.000000 230.000000 194.000000 \n", + "\n", + " Sp. Def Speed Generation \n", + "count 800.000000 800.000000 800.00000 \n", + "mean 71.902500 68.277500 3.32375 \n", + "std 27.828916 29.060474 1.66129 \n", + "min 20.000000 5.000000 1.00000 \n", + "25% 50.000000 45.000000 2.00000 \n", + "50% 70.000000 65.000000 3.00000 \n", + "75% 90.000000 90.000000 5.00000 \n", + "max 230.000000 180.000000 6.00000 \n", + " # Total HP Attack Defense \\\n", + "Legendary \n", + "False 353.315646 417.213605 67.182313 75.669388 71.559184 \n", + "True 470.215385 637.384615 92.738462 116.676923 99.661538 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation \n", + "Legendary \n", + "False 68.454422 68.892517 65.455782 3.284354 \n", + "True 122.184615 105.938462 100.184615 3.769231 \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/wj/9yy2qymx1q58s3n2gkl01vj00000gn/T/ipykernel_3956/559280183.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", + " legendary_vs_normal = data.groupby(\"Legendary\").mean()\n" + ] + } + ], "source": [ - "# Your code here:\n" + "data = pd.read_csv(\"pokemon.csv\")\n", + "data\n", + "print(data.head())\n", + "print(data.info())\n", + "print(data.describe())\n", + "legendary_vs_normal = data.groupby(\"Legendary\").mean()\n", + "print(legendary_vs_normal)" ] }, { @@ -58,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -75,7 +161,16 @@ " \"\"\"\n", " results = {}\n", "\n", - " # Your code here\n", + " for feature in features:\n", + " \n", + " s1_values = s1[feature]\n", + " s2_values = s2[feature]\n", + " \n", + " \n", + " t_statistic, p_value = ttest_ind(s1_values, s2_values)\n", + " \n", + " \n", + " results[feature] = p_value\n", " \n", " return results" ] @@ -101,11 +196,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': 3.330647684846191e-15, 'Attack': 7.827253003205333e-24, 'Defense': 1.5842226094427255e-12, 'Sp. Atk': 6.314915770427266e-41, 'Sp. Def': 1.8439809580409594e-26, 'Speed': 2.3540754436898437e-21, 'Total': 3.0952457469652825e-52}\n" + ] + } + ], "source": [ - "# Your code here\n" + "legendary_pokemon = data[data['Legendary'] == True]\n", + "non_legendary_pokemon = data[data['Legendary'] == False]\n", + "\n", + "\n", + "results_legendary_vs_non_legendary = t_test_features(legendary_pokemon, non_legendary_pokemon)\n", + "\n", + "print(results_legendary_vs_non_legendary)" ] }, { @@ -121,7 +230,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "#The p-values are far below common significance levels (e.g., 0.05), indicating that the differences observed in the stats (HP, Attack, Defense, etc.) between Legendary and Non-Legendary Pokémon are unlikely to be due to random chance" ] }, { @@ -133,11 +242,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': 0.13791881412813622, 'Attack': 0.24050968418101445, 'Defense': 0.5407630349194362, 'Sp. Atk': 0.14119788176331508, 'Sp. Def': 0.1678122623160639, 'Speed': 0.0028356954812578704, 'Total': 0.5599140649014442}\n" + ] + } + ], "source": [ - "# Your code here\n" + "generation_1_pokemon = data[data['Generation'] == 1]\n", + "generation_2_pokemon = data[data['Generation'] == 2]\n", + "results_gen1_vs_gen2 = t_test_features(generation_1_pokemon, generation_2_pokemon)\n", + "print(results_gen1_vs_gen2)" ] }, { @@ -153,7 +273,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "#In either case, the p-values provide a quantitative measure of the likelihood that the observed differences are due to random chance. It's important to consider the significance level you choose and interpret the results accordingly" ] }, { @@ -165,11 +285,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': 0.11060643144431842, 'Attack': 0.00015741395666164396, 'Defense': 3.250594205757004e-08, 'Sp. Atk': 0.0001454917404035147, 'Sp. Def': 0.00010893304795534396, 'Speed': 0.02405141079403746, 'Total': 1.1749035008828752e-07}\n" + ] + } + ], "source": [ - "# Your code here\n" + "single_type_pokemon = data[data['Type 2'].isnull()]\n", + "dual_type_pokemon = data[data['Type 2'].notnull()]\n", + "results_single_type_vs_dual_type = t_test_features(single_type_pokemon, dual_type_pokemon)\n", + "print(results_single_type_vs_dual_type)" ] }, { @@ -185,7 +316,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# p-values provide a quantitative measure of the likelihood that the observed differences are due to random chance." ] }, { @@ -199,11 +330,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attack vs Defense:\n", + "T-Statistic: 4.325566393330478\n", + "P-Value: 1.7140303479358558e-05\n", + "\n", + "Sp. Atk vs Sp. Def:\n", + "T-Statistic: 0.853986188453353\n", + "P-Value: 0.3933685997548122\n" + ] + } + ], "source": [ - "# Your code here\n" + "attack_values = data['Attack']\n", + "defense_values = data['Defense']\n", + "attack_vs_defense_t_statistic, attack_vs_defense_p_value = ttest_rel(attack_values, defense_values)\n", + "\n", + "\n", + "sp_atk_values = data['Sp. Atk']\n", + "sp_def_values = data['Sp. Def']\n", + "sp_atk_vs_sp_def_t_statistic, sp_atk_vs_sp_def_p_value = ttest_rel(sp_atk_values, sp_def_values)\n", + "\n", + "\n", + "print(\"Attack vs Defense:\")\n", + "print(\"T-Statistic:\", attack_vs_defense_t_statistic)\n", + "print(\"P-Value:\", attack_vs_defense_p_value)\n", + "\n", + "print(\"\\nSp. Atk vs Sp. Def:\")\n", + "print(\"T-Statistic:\", sp_atk_vs_sp_def_t_statistic)\n", + "print(\"P-Value:\", sp_atk_vs_sp_def_p_value)" ] }, { @@ -219,13 +380,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# you don't have strong evidence to conclude that there is a significant difference between the Special Attack" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -239,7 +400,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb old mode 100755 new mode 100644 index 1f0e335..aadbbb3 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -17,21 +17,280 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "from scipy import stats\n", + "from scipy.stats import ttest_ind\n", + "from scipy.stats import ttest_rel\n", + "from scipy.stats import f_oneway" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
..........................................
795719DiancieRockFairy60050100150100150506True
796719DiancieMega DiancieRockFairy700501601101601101106True
797720HoopaHoopa ConfinedPsychicGhost6008011060150130706True
798720HoopaHoopa UnboundPsychicDark6808016060170130806True
799721VolcanionFireWater6008011012013090706True
\n", + "

800 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + ".. ... ... ... ... ... .. ... ... \n", + "795 719 Diancie Rock Fairy 600 50 100 150 \n", + "796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 \n", + "797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 \n", + "798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 \n", + "799 721 Volcanion Fire Water 600 80 110 120 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False \n", + ".. ... ... ... ... ... \n", + "795 100 150 50 6 True \n", + "796 160 110 110 6 True \n", + "797 150 130 70 6 True \n", + "798 170 130 80 6 True \n", + "799 130 90 70 6 True \n", + "\n", + "[800 rows x 13 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Load the data:\n" + "# Load the data:\n", + "data = pd.read_csv(\"pokemon.csv\")\n", + "data" ] }, { @@ -58,12 +317,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n", - "\n", + "unique_types = pd.concat([data['Type 1'], data['Type 2']]).unique()\n", + " \n", "\n", "len(unique_types) # you should see 19" ] @@ -85,13 +355,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pokemon_totals = []\n", "\n", - "# Your code here\n", + "for type in unique_types:\n", + " if isinstance(type, str): \n", + " total_for_type = data[data['Type 1'] == type]['Total'].sum()\n", + " total_for_type += data[data['Type 2'] == type]['Total'].sum()\n", + " pokemon_totals.append(total_for_type)\n", + "\n", "\n", "len(pokemon_totals) # you should see 18" ] @@ -111,11 +397,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ANOVA Statistic: 6.6175382960055344\n", + "P-value: 2.6457458815984803e-15\n" + ] + } + ], "source": [ - "# Your code here\n" + "type_totals = {type: [] for type in unique_types if isinstance(type, str)}\n", + "\n", + "\n", + "for _, row in data.iterrows():\n", + " if isinstance(row['Type 1'], str):\n", + " type_totals[row['Type 1']].append(row['Total'])\n", + " if isinstance(row['Type 2'], str) and row['Type 2'] != row['Type 1']:\n", + " type_totals[row['Type 2']].append(row['Total'])\n", + "\n", + "# Perform ANOVA test\n", + "anova_result = f_oneway(*type_totals.values())\n", + "\n", + "print(\"ANOVA Statistic:\", anova_result.statistic)\n", + "print(\"P-value:\", anova_result.pvalue)" ] }, { @@ -131,13 +439,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "#In hypothesis testing, a small p-value indicates strong evidence against the null hypothesis" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -151,7 +459,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,