Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 186 additions & 25 deletions your-code/challenge-1.ipynb
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"# Import libraries\n",
"import pandas as pd"
"import pandas as pd\n",
"from scipy import stats\n",
"from scipy.stats import ttest_ind\n",
"from scipy.stats import ttest_rel"
]
},
{
Expand All @@ -38,11 +41,94 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" # Name Type 1 Type 2 Total HP Attack Defense \\\n",
"0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
"1 2 Ivysaur Grass Poison 405 60 62 63 \n",
"2 3 Venusaur Grass Poison 525 80 82 83 \n",
"3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
"4 4 Charmander Fire NaN 309 39 52 43 \n",
"\n",
" Sp. Atk Sp. Def Speed Generation Legendary \n",
"0 65 65 45 1 False \n",
"1 80 80 60 1 False \n",
"2 100 100 80 1 False \n",
"3 122 120 80 1 False \n",
"4 60 50 65 1 False \n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 800 entries, 0 to 799\n",
"Data columns (total 13 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 # 800 non-null int64 \n",
" 1 Name 800 non-null object\n",
" 2 Type 1 800 non-null object\n",
" 3 Type 2 414 non-null object\n",
" 4 Total 800 non-null int64 \n",
" 5 HP 800 non-null int64 \n",
" 6 Attack 800 non-null int64 \n",
" 7 Defense 800 non-null int64 \n",
" 8 Sp. Atk 800 non-null int64 \n",
" 9 Sp. Def 800 non-null int64 \n",
" 10 Speed 800 non-null int64 \n",
" 11 Generation 800 non-null int64 \n",
" 12 Legendary 800 non-null bool \n",
"dtypes: bool(1), int64(9), object(3)\n",
"memory usage: 75.9+ KB\n",
"None\n",
" # Total HP Attack Defense Sp. Atk \\\n",
"count 800.000000 800.00000 800.000000 800.000000 800.000000 800.000000 \n",
"mean 362.813750 435.10250 69.258750 79.001250 73.842500 72.820000 \n",
"std 208.343798 119.96304 25.534669 32.457366 31.183501 32.722294 \n",
"min 1.000000 180.00000 1.000000 5.000000 5.000000 10.000000 \n",
"25% 184.750000 330.00000 50.000000 55.000000 50.000000 49.750000 \n",
"50% 364.500000 450.00000 65.000000 75.000000 70.000000 65.000000 \n",
"75% 539.250000 515.00000 80.000000 100.000000 90.000000 95.000000 \n",
"max 721.000000 780.00000 255.000000 190.000000 230.000000 194.000000 \n",
"\n",
" Sp. Def Speed Generation \n",
"count 800.000000 800.000000 800.00000 \n",
"mean 71.902500 68.277500 3.32375 \n",
"std 27.828916 29.060474 1.66129 \n",
"min 20.000000 5.000000 1.00000 \n",
"25% 50.000000 45.000000 2.00000 \n",
"50% 70.000000 65.000000 3.00000 \n",
"75% 90.000000 90.000000 5.00000 \n",
"max 230.000000 180.000000 6.00000 \n",
" # Total HP Attack Defense \\\n",
"Legendary \n",
"False 353.315646 417.213605 67.182313 75.669388 71.559184 \n",
"True 470.215385 637.384615 92.738462 116.676923 99.661538 \n",
"\n",
" Sp. Atk Sp. Def Speed Generation \n",
"Legendary \n",
"False 68.454422 68.892517 65.455782 3.284354 \n",
"True 122.184615 105.938462 100.184615 3.769231 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/wj/9yy2qymx1q58s3n2gkl01vj00000gn/T/ipykernel_3956/559280183.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" legendary_vs_normal = data.groupby(\"Legendary\").mean()\n"
]
}
],
"source": [
"# Your code here:\n"
"data = pd.read_csv(\"pokemon.csv\")\n",
"data\n",
"print(data.head())\n",
"print(data.info())\n",
"print(data.describe())\n",
"legendary_vs_normal = data.groupby(\"Legendary\").mean()\n",
"print(legendary_vs_normal)"
]
},
{
Expand All @@ -58,7 +144,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -75,7 +161,16 @@
" \"\"\"\n",
" results = {}\n",
"\n",
" # Your code here\n",
" for feature in features:\n",
" \n",
" s1_values = s1[feature]\n",
" s2_values = s2[feature]\n",
" \n",
" \n",
" t_statistic, p_value = ttest_ind(s1_values, s2_values)\n",
" \n",
" \n",
" results[feature] = p_value\n",
" \n",
" return results"
]
Expand All @@ -101,11 +196,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 50,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'HP': 3.330647684846191e-15, 'Attack': 7.827253003205333e-24, 'Defense': 1.5842226094427255e-12, 'Sp. Atk': 6.314915770427266e-41, 'Sp. Def': 1.8439809580409594e-26, 'Speed': 2.3540754436898437e-21, 'Total': 3.0952457469652825e-52}\n"
]
}
],
"source": [
"# Your code here\n"
"legendary_pokemon = data[data['Legendary'] == True]\n",
"non_legendary_pokemon = data[data['Legendary'] == False]\n",
"\n",
"\n",
"results_legendary_vs_non_legendary = t_test_features(legendary_pokemon, non_legendary_pokemon)\n",
"\n",
"print(results_legendary_vs_non_legendary)"
]
},
{
Expand All @@ -121,7 +230,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"#The p-values are far below common significance levels (e.g., 0.05), indicating that the differences observed in the stats (HP, Attack, Defense, etc.) between Legendary and Non-Legendary Pokémon are unlikely to be due to random chance"
]
},
{
Expand All @@ -133,11 +242,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 51,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'HP': 0.13791881412813622, 'Attack': 0.24050968418101445, 'Defense': 0.5407630349194362, 'Sp. Atk': 0.14119788176331508, 'Sp. Def': 0.1678122623160639, 'Speed': 0.0028356954812578704, 'Total': 0.5599140649014442}\n"
]
}
],
"source": [
"# Your code here\n"
"generation_1_pokemon = data[data['Generation'] == 1]\n",
"generation_2_pokemon = data[data['Generation'] == 2]\n",
"results_gen1_vs_gen2 = t_test_features(generation_1_pokemon, generation_2_pokemon)\n",
"print(results_gen1_vs_gen2)"
]
},
{
Expand All @@ -153,7 +273,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"#In either case, the p-values provide a quantitative measure of the likelihood that the observed differences are due to random chance. It's important to consider the significance level you choose and interpret the results accordingly"
]
},
{
Expand All @@ -165,11 +285,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 52,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'HP': 0.11060643144431842, 'Attack': 0.00015741395666164396, 'Defense': 3.250594205757004e-08, 'Sp. Atk': 0.0001454917404035147, 'Sp. Def': 0.00010893304795534396, 'Speed': 0.02405141079403746, 'Total': 1.1749035008828752e-07}\n"
]
}
],
"source": [
"# Your code here\n"
"single_type_pokemon = data[data['Type 2'].isnull()]\n",
"dual_type_pokemon = data[data['Type 2'].notnull()]\n",
"results_single_type_vs_dual_type = t_test_features(single_type_pokemon, dual_type_pokemon)\n",
"print(results_single_type_vs_dual_type)"
]
},
{
Expand All @@ -185,7 +316,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"# p-values provide a quantitative measure of the likelihood that the observed differences are due to random chance."
]
},
{
Expand All @@ -199,11 +330,41 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 54,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Attack vs Defense:\n",
"T-Statistic: 4.325566393330478\n",
"P-Value: 1.7140303479358558e-05\n",
"\n",
"Sp. Atk vs Sp. Def:\n",
"T-Statistic: 0.853986188453353\n",
"P-Value: 0.3933685997548122\n"
]
}
],
"source": [
"# Your code here\n"
"attack_values = data['Attack']\n",
"defense_values = data['Defense']\n",
"attack_vs_defense_t_statistic, attack_vs_defense_p_value = ttest_rel(attack_values, defense_values)\n",
"\n",
"\n",
"sp_atk_values = data['Sp. Atk']\n",
"sp_def_values = data['Sp. Def']\n",
"sp_atk_vs_sp_def_t_statistic, sp_atk_vs_sp_def_p_value = ttest_rel(sp_atk_values, sp_def_values)\n",
"\n",
"\n",
"print(\"Attack vs Defense:\")\n",
"print(\"T-Statistic:\", attack_vs_defense_t_statistic)\n",
"print(\"P-Value:\", attack_vs_defense_p_value)\n",
"\n",
"print(\"\\nSp. Atk vs Sp. Def:\")\n",
"print(\"T-Statistic:\", sp_atk_vs_sp_def_t_statistic)\n",
"print(\"P-Value:\", sp_atk_vs_sp_def_p_value)"
]
},
{
Expand All @@ -219,13 +380,13 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"# you don't have strong evidence to conclude that there is a significant difference between the Special Attack"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -239,7 +400,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.10.9"
}
},
"nbformat": 4,
Expand Down
Loading