diff --git a/your-code/__pycache__/data_check.cpython-313.pyc b/your-code/__pycache__/data_check.cpython-313.pyc
new file mode 100644
index 0000000..1b2f965
Binary files /dev/null and b/your-code/__pycache__/data_check.cpython-313.pyc differ
diff --git a/your-code/data_check.py b/your-code/data_check.py
new file mode 100644
index 0000000..46deb24
--- /dev/null
+++ b/your-code/data_check.py
@@ -0,0 +1,87 @@
+'''This file performs an initial exploration of the data and outputs important details,
+ such as the number of rows and columns, data types, unique values, categorical columns,
+ and checks for null and duplicated values.'''
+
+
+def initial_chk(data_frame):
+ '''This function performs an initial exploration of the data and outputs important details.
+
+ It checks:
+ - The number of columns and rows
+ - The data types of each column
+ - The count of unique values in each column
+ - Identifies potential categorical columns (columns with < 20 unique values)
+ - Outputs the unique values count for categorical columns
+
+ Parameters:
+ - data_frame (pd.DataFrame): The input DataFrame to explore.
+
+ Returns:
+ - None: This function prints the results directly and does not return a value.
+ '''
+
+ # Display the number of columns and rows
+ print(f"Number of columns: {data_frame.shape[1]} and rows: {data_frame.shape[0]}")
+
+ # Show data types of each column
+ print("\nData types:")
+ print(data_frame.dtypes)
+
+ # Display the unique values count for each column
+ print("\nUnique values count:")
+ unique_values_count = data_frame.nunique()
+ print(unique_values_count)
+
+ # Identify categorical columns (those with less than 20 unique values)
+ categorical_columns = unique_values_count[unique_values_count < 20].index
+ print(f"\nThese columns appear to be categorical (less than 20 unique values):\n{categorical_columns}")
+
+ # Show the unique value count for categorical columns
+ print("\nUnique value count for categorical columns:")
+ for col in categorical_columns:
+ print(f"\n{col}:")
+ print(data_frame[col].value_counts())
+
+
+def check_null(data_frame):
+ '''
+ Check for NaN values in each column and print the total per column.
+
+ Parameters:
+ - data_frame (pd.DataFrame): The input DataFrame to check for null values.
+
+ Returns:
+ - None: This function prints the count of null values for each column and does not return a value.
+ '''
+
+ print("\nCount of null values:")
+ print(data_frame.isnull().sum())
+
+def check_duplicated(data_frame):
+ '''
+ Check for duplicated values in the data frame and print the total.
+
+ Parameters:
+ - data_frame (pd.DataFrame): The input DataFrame to check for duplicates.
+
+ Returns:
+ - None: This function prints the count of duplicated rows and does not return a value.
+ '''
+
+ print("\nCount of duplicated values:")
+ print(data_frame.duplicated().sum())
+
+def check(data_frame):
+ '''
+ A function to call all the functions for the data exploration.
+
+ Parameters:
+ - data_frame (pd.DataFrame): The input DataFrame to explore.
+
+ Returns:
+ - None: This function calls other functions and does not return a value.
+ '''
+
+ initial_chk(data_frame)
+ check_null(data_frame)
+ check_duplicated(data_frame)
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index cdc1acb..de1af21 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -14,12 +14,15 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "import scipy.stats as std\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import data_check as ch\n"
]
},
{
@@ -35,12 +38,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Run this code:\n",
- "\n",
"pokemon = pd.read_csv('../pokemon.csv')"
]
},
@@ -53,12 +55,287 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " VenusaurMega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 625 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 309 | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 \n",
+ "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
+ "4 4 Charmander Fire NaN 309 39 52 43 \n",
+ "\n",
+ " Sp. Atk Sp. Def Speed Generation Legendary \n",
+ "0 65 65 45 1 False \n",
+ "1 80 80 60 1 False \n",
+ "2 100 100 80 1 False \n",
+ "3 122 120 80 1 False \n",
+ "4 60 50 65 1 False "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of columns: 13 and rows: 800\n",
+ "\n",
+ "Data types:\n",
+ "# int64\n",
+ "Name object\n",
+ "Type 1 object\n",
+ "Type 2 object\n",
+ "Total int64\n",
+ "HP int64\n",
+ "Attack int64\n",
+ "Defense int64\n",
+ "Sp. Atk int64\n",
+ "Sp. Def int64\n",
+ "Speed int64\n",
+ "Generation int64\n",
+ "Legendary bool\n",
+ "dtype: object\n",
+ "\n",
+ "Unique values count:\n",
+ "# 721\n",
+ "Name 800\n",
+ "Type 1 18\n",
+ "Type 2 18\n",
+ "Total 200\n",
+ "HP 94\n",
+ "Attack 111\n",
+ "Defense 103\n",
+ "Sp. Atk 105\n",
+ "Sp. Def 92\n",
+ "Speed 108\n",
+ "Generation 6\n",
+ "Legendary 2\n",
+ "dtype: int64\n",
+ "\n",
+ "These columns appear to be categorical (less than 20 unique values):\n",
+ "Index(['Type 1', 'Type 2', 'Generation', 'Legendary'], dtype='object')\n",
+ "\n",
+ "Unique value count for categorical columns:\n",
+ "\n",
+ "Type 1:\n",
+ "Type 1\n",
+ "Water 112\n",
+ "Normal 98\n",
+ "Grass 70\n",
+ "Bug 69\n",
+ "Psychic 57\n",
+ "Fire 52\n",
+ "Rock 44\n",
+ "Electric 44\n",
+ "Ground 32\n",
+ "Ghost 32\n",
+ "Dragon 32\n",
+ "Dark 31\n",
+ "Poison 28\n",
+ "Fighting 27\n",
+ "Steel 27\n",
+ "Ice 24\n",
+ "Fairy 17\n",
+ "Flying 4\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "Type 2:\n",
+ "Type 2\n",
+ "Flying 97\n",
+ "Ground 35\n",
+ "Poison 34\n",
+ "Psychic 33\n",
+ "Fighting 26\n",
+ "Grass 25\n",
+ "Fairy 23\n",
+ "Steel 22\n",
+ "Dark 20\n",
+ "Dragon 18\n",
+ "Ice 14\n",
+ "Rock 14\n",
+ "Water 14\n",
+ "Ghost 14\n",
+ "Fire 12\n",
+ "Electric 6\n",
+ "Normal 4\n",
+ "Bug 3\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "Generation:\n",
+ "Generation\n",
+ "1 166\n",
+ "5 165\n",
+ "3 160\n",
+ "4 121\n",
+ "2 106\n",
+ "6 82\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "Legendary:\n",
+ "Legendary\n",
+ "False 735\n",
+ "True 65\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "Count of null values:\n",
+ "# 0\n",
+ "Name 0\n",
+ "Type 1 0\n",
+ "Type 2 386\n",
+ "Total 0\n",
+ "HP 0\n",
+ "Attack 0\n",
+ "Defense 0\n",
+ "Sp. Atk 0\n",
+ "Sp. Def 0\n",
+ "Speed 0\n",
+ "Generation 0\n",
+ "Legendary 0\n",
+ "dtype: int64\n",
+ "\n",
+ "Count of duplicated values:\n",
+ "0\n"
+ ]
+ }
+ ],
+ "source": [
+ "ch.check(pokemon)"
]
},
{
@@ -70,12 +347,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Legendary\n",
+ "False 735\n",
+ "True 65\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "legendary_counts = pokemon[\"Legendary\"].value_counts()\n",
+ "legendary_counts\n"
]
},
{
@@ -87,11 +379,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " std | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 417.213605 | \n",
+ " 106.760417 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 637.384615 | \n",
+ " 60.937389 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean std\n",
+ "Legendary \n",
+ "False 417.213605 106.760417\n",
+ "True 637.384615 60.937389"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
+ "legendary_stats = pokemon.groupby(\"Legendary\")[\"Total\"].agg([\"mean\", \"std\"])\n",
+ "legendary_stats\n",
"\n"
]
},
@@ -106,12 +457,39 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null Hypothesis (H₀): There is no significant difference between the means of the two groups\n",
+ "Alternate Hypothesis (H₁): There means of points between the two groups is not equal\n",
+ "Test Statistic (t): 25.83\n",
+ "P-Value: 0.000000000000000000000000000000000000000000000093579543359574\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "# Hypotheses\n",
+ "null_hypothesis = \"There is no significant difference between the means of the two groups\" # H₀\n",
+ "alt_hypothesis = \"There means of points between the two groups is not equal\" # H₁\n",
+ "\n",
+ "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n",
+ "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n",
+ "\n",
+ "legendary_t = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n",
+ "legendary_f = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n",
+ "\n",
+ "# Two-sample t-test for independent samples\n",
+ "t_stat, p_value = std.ttest_ind(legendary_t, legendary_f, equal_var=False)\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.60f}\")\n",
+ "print()\n"
]
},
{
@@ -123,12 +501,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean fares are different for legendary and non-legendary Pokemons.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "\n",
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The mean points for legendary and non-legendary Pokemons are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean fares are different for legendary and non-legendary Pokemons.\")\n"
]
},
{
@@ -140,12 +534,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Type 1\n",
+ "Water 112\n",
+ "Normal 98\n",
+ "Grass 70\n",
+ "Bug 69\n",
+ "Psychic 57\n",
+ "Fire 52\n",
+ "Rock 44\n",
+ "Electric 44\n",
+ "Ground 32\n",
+ "Ghost 32\n",
+ "Dragon 32\n",
+ "Dark 31\n",
+ "Poison 28\n",
+ "Fighting 27\n",
+ "Steel 27\n",
+ "Ice 24\n",
+ "Fairy 17\n",
+ "Flying 4\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "type_counts = pokemon[\"Type 1\"].value_counts()\n",
+ "type_counts"
]
},
{
@@ -157,12 +583,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Water Pokémon Mean: 430.46 and Std Dev: 113.19\n",
+ "Non-Water Pokémon Mean: 435.86 and Std Dev: 121.09\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "water_pokemon = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"]\n",
+ "non_water_pokemon = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"]\n",
+ "\n",
+ "water_mean = water_pokemon.mean()\n",
+ "water_std = water_pokemon.std()\n",
+ "\n",
+ "non_water_mean = non_water_pokemon.mean()\n",
+ "non_water_std = non_water_pokemon.std()\n",
+ "\n",
+ "print(f\"Water Pokémon Mean: {water_mean:.2f} and Std Dev: {water_std:.2f}\")\n",
+ "print(f\"Non-Water Pokémon Mean: {non_water_mean:.2f} and Std Dev: {non_water_std:.2f}\")\n"
]
},
{
@@ -174,12 +620,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null Hypothesis (H₀): There is no significant difference between the means of the two groups\n",
+ "Alternate Hypothesis (H₁): There means of points between the two groups is not equal\n",
+ "Test Statistic (t): -0.44\n",
+ "P-Value: 0.66\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "# Hypotheses\n",
+ "null_hypothesis = \"There is no significant difference between the means of the two groups\" # H₀\n",
+ "alt_hypothesis = \"There means of points between the two groups is not equal\" # H₁\n",
+ "\n",
+ "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n",
+ "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n",
+ "\n",
+ "# Two-sample t-test for independent samples\n",
+ "t_stat, p_value = std.ttest_ind(water_pokemon, non_water_pokemon, equal_var=True)\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.2f}\")\n",
+ "print()"
]
},
{
@@ -191,12 +661,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fail to Reject the Null Hypothesis: The mean points for water and non-water Pokemons are not significantly different.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The mean points for water and non-water Pokemons are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean fares are different for water and non-water Pokemons.\")"
]
},
{
@@ -210,11 +693,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null Hypothesis (H₀): There is no significant difference between Attack and Defense scores\n",
+ "Alternate Hypothesis (H₁): There is a significant difference between Attack and Defense scores\n",
+ "T-statistic: 4.3256\n",
+ "P-value: 0.0000171403\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
+ "\n",
+ "# Hypotheses\n",
+ "null_hypothesis = \"There is no significant difference between Attack and Defense scores\" # H₀\n",
+ "alt_hypothesis = \"There is a significant difference between Attack and Defense scores\" # H₁\n",
+ "\n",
+ "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n",
+ "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n",
+ "\n",
+ "# Two-sample t-test for dependent samples\n",
+ "t_stat, p_value = std.ttest_rel(pokemon[\"Attack\"], pokemon[\"Defense\"])\n",
+ "\n",
+ "print(f\"T-statistic: {t_stat:.4f}\")\n",
+ "print(f\"P-value: {p_value:.10f}\")\n",
"\n"
]
},
@@ -227,12 +734,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reject the null hypothesis: There is a significant difference between Attack and Defense scores.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "if p_value > alpha:\n",
+ " print(\"Fail to reject the null hypothesis: No significant difference between Attack and Defense scores.\")\n",
+ "else:\n",
+ " print(\"Reject the null hypothesis: There is a significant difference between Attack and Defense scores.\")"
]
},
{
@@ -244,12 +762,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null Hypothesis (H₀): The mean Special Attack and Special Defense are equal\n",
+ "Alternate Hypothesis (H₁): The mean Special Attack and Special Defense are different\n",
+ "T-statistic: 0.8540\n",
+ "P-value: 0.3933685998\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "# Hypotheses\n",
+ "null_hypothesis = \"The mean Special Attack and Special Defense are equal\" # H₀\n",
+ "alt_hypothesis = \" The mean Special Attack and Special Defense are different\" # H₁\n",
+ "\n",
+ "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n",
+ "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n",
+ "\n",
+ "# Two-sample t-test for dependent samples\n",
+ "t_stat, p_value = std.ttest_rel(pokemon[\"Sp. Atk\"], pokemon[\"Sp. Def\"])\n",
+ "\n",
+ "print(f\"T-statistic: {t_stat:.4f}\")\n",
+ "print(f\"P-value: {p_value:.10f}\")"
]
},
{
@@ -261,12 +802,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fail to reject the null hypothesis: No significant difference between Special Attack and Special Defense scores.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to reject the null hypothesis: No significant difference between Special Attack and Special Defense scores.\")\n",
+ "else:\n",
+ " print(\"Reject the null hypothesis: There is a significant difference between Special Attack and Special Defense scores.\")"
]
},
{
@@ -280,12 +833,43 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null Hypothesis (H₀): The mean difference between Attack and Defense is zero\n",
+ "Alternate Hypothesis (H₁): The mean difference is not zero\n",
+ "T-statistic: 4.3256\n",
+ "P-value: 0.0000171403\n",
+ "Reject the null hypothesis: There is a significant difference between Attack and Defense.\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- " \n",
+ "\n",
+ "# Hypotheses\n",
+ "null_hypothesis = \"The mean difference between Attack and Defense is zero\" # H₀\n",
+ "alt_hypothesis = \"The mean difference is not zero\" # H₁\n",
+ "\n",
+ "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n",
+ "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n",
+ "\n",
+ "diff = pokemon[\"Attack\"] - pokemon[\"Defense\"]\n",
+ "\n",
+ "# one-sample t-test\n",
+ "t_stat, p_value = std.ttest_1samp(diff, 0)\n",
+ "\n",
+ "print(f\"T-statistic: {t_stat:.4f}\")\n",
+ "print(f\"P-value: {p_value:.10f}\") \n",
+ "\n",
+ "if p_value < alpha:\n",
+ " print(\"Reject the null hypothesis: There is a significant difference between Attack and Defense.\")\n",
+ "else:\n",
+ " print(\"Fail to reject the null hypothesis: No significant difference between Attack and Defense.\")\n",
" "
]
},
@@ -302,12 +886,76 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Water Type | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 627 | \n",
+ " 108 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 61 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Water Type False True \n",
+ "Legendary \n",
+ "False 627 108\n",
+ "True 61 4"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "pokemon['Is_Water_Type'] = pokemon['Type 1'] == 'Water'\n",
+ "\n",
+ "# Create a contingency table comparing Legendary status and Water type\n",
+ "contingency_table = pd.crosstab(pokemon['Legendary'], pokemon['Is_Water_Type'], \n",
+ " rownames=['Legendary'], colnames=['Water Type'])\n",
+ "\n",
+ "contingency_table"
]
},
{
@@ -319,12 +967,40 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null Hypothesis (H₀): There is no association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\n",
+ "Alternate Hypothesis (H₁): There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\n",
+ "Chi-squared statistic: 2.9429200762850503\n",
+ "p-value: 0.0862546724955095\n",
+ "Degrees of freedom: 1\n",
+ "Expected frequencies table:\n",
+ "[[632.1 102.9]\n",
+ " [ 55.9 9.1]]\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "# Hypotheses\n",
+ "null_hypothesis = \"There is no association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\" # H₀\n",
+ "alt_hypothesis = \" There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\" # H₁\n",
+ "\n",
+ "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n",
+ "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n",
+ "\n",
+ "chi2, p, dof, expected = std.chi2_contingency(contingency_table)\n",
+ "\n",
+ "print(f\"Chi-squared statistic: {chi2}\")\n",
+ "print(f\"p-value: {p}\")\n",
+ "print(f\"Degrees of freedom: {dof}\")\n",
+ "print(f\"Expected frequencies table:\\n{expected}\")"
]
},
{
@@ -336,20 +1012,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "We reject the null hypothesis: There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not.\n"
+ ]
+ }
+ ],
"source": [
"# Your answer here:\n",
- "\n"
+ "\n",
+ "if p_value > alpha:\n",
+ " print(f\"Fail to reject the null hypothesis: There is no association between whether a Pokémon is legendary or not and whether its Type 1 is water or not.\")\n",
+ "else:\n",
+ " print(f\"We reject the null hypothesis: There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not.\")\n"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -368,7 +1049,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9"
+ "version": "3.13.2"
}
},
"nbformat": 4,