diff --git a/your-code/__pycache__/data_check.cpython-313.pyc b/your-code/__pycache__/data_check.cpython-313.pyc new file mode 100644 index 0000000..1b2f965 Binary files /dev/null and b/your-code/__pycache__/data_check.cpython-313.pyc differ diff --git a/your-code/data_check.py b/your-code/data_check.py new file mode 100644 index 0000000..46deb24 --- /dev/null +++ b/your-code/data_check.py @@ -0,0 +1,87 @@ +'''This file performs an initial exploration of the data and outputs important details, + such as the number of rows and columns, data types, unique values, categorical columns, + and checks for null and duplicated values.''' + + +def initial_chk(data_frame): + '''This function performs an initial exploration of the data and outputs important details. + + It checks: + - The number of columns and rows + - The data types of each column + - The count of unique values in each column + - Identifies potential categorical columns (columns with < 20 unique values) + - Outputs the unique values count for categorical columns + + Parameters: + - data_frame (pd.DataFrame): The input DataFrame to explore. + + Returns: + - None: This function prints the results directly and does not return a value. + ''' + + # Display the number of columns and rows + print(f"Number of columns: {data_frame.shape[1]} and rows: {data_frame.shape[0]}") + + # Show data types of each column + print("\nData types:") + print(data_frame.dtypes) + + # Display the unique values count for each column + print("\nUnique values count:") + unique_values_count = data_frame.nunique() + print(unique_values_count) + + # Identify categorical columns (those with less than 20 unique values) + categorical_columns = unique_values_count[unique_values_count < 20].index + print(f"\nThese columns appear to be categorical (less than 20 unique values):\n{categorical_columns}") + + # Show the unique value count for categorical columns + print("\nUnique value count for categorical columns:") + for col in categorical_columns: + print(f"\n{col}:") + print(data_frame[col].value_counts()) + + +def check_null(data_frame): + ''' + Check for NaN values in each column and print the total per column. + + Parameters: + - data_frame (pd.DataFrame): The input DataFrame to check for null values. + + Returns: + - None: This function prints the count of null values for each column and does not return a value. + ''' + + print("\nCount of null values:") + print(data_frame.isnull().sum()) + +def check_duplicated(data_frame): + ''' + Check for duplicated values in the data frame and print the total. + + Parameters: + - data_frame (pd.DataFrame): The input DataFrame to check for duplicates. + + Returns: + - None: This function prints the count of duplicated rows and does not return a value. + ''' + + print("\nCount of duplicated values:") + print(data_frame.duplicated().sum()) + +def check(data_frame): + ''' + A function to call all the functions for the data exploration. + + Parameters: + - data_frame (pd.DataFrame): The input DataFrame to explore. + + Returns: + - None: This function calls other functions and does not return a value. + ''' + + initial_chk(data_frame) + check_null(data_frame) + check_duplicated(data_frame) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..de1af21 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import scipy.stats as std\n", + "import numpy as np\n", + "import pandas as pd\n", + "import data_check as ch\n" ] }, { @@ -35,12 +38,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Run this code:\n", - "\n", "pokemon = pd.read_csv('../pokemon.csv')" ] }, @@ -53,12 +55,287 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of columns: 13 and rows: 800\n", + "\n", + "Data types:\n", + "# int64\n", + "Name object\n", + "Type 1 object\n", + "Type 2 object\n", + "Total int64\n", + "HP int64\n", + "Attack int64\n", + "Defense int64\n", + "Sp. Atk int64\n", + "Sp. Def int64\n", + "Speed int64\n", + "Generation int64\n", + "Legendary bool\n", + "dtype: object\n", + "\n", + "Unique values count:\n", + "# 721\n", + "Name 800\n", + "Type 1 18\n", + "Type 2 18\n", + "Total 200\n", + "HP 94\n", + "Attack 111\n", + "Defense 103\n", + "Sp. Atk 105\n", + "Sp. Def 92\n", + "Speed 108\n", + "Generation 6\n", + "Legendary 2\n", + "dtype: int64\n", + "\n", + "These columns appear to be categorical (less than 20 unique values):\n", + "Index(['Type 1', 'Type 2', 'Generation', 'Legendary'], dtype='object')\n", + "\n", + "Unique value count for categorical columns:\n", + "\n", + "Type 1:\n", + "Type 1\n", + "Water 112\n", + "Normal 98\n", + "Grass 70\n", + "Bug 69\n", + "Psychic 57\n", + "Fire 52\n", + "Rock 44\n", + "Electric 44\n", + "Ground 32\n", + "Ghost 32\n", + "Dragon 32\n", + "Dark 31\n", + "Poison 28\n", + "Fighting 27\n", + "Steel 27\n", + "Ice 24\n", + "Fairy 17\n", + "Flying 4\n", + "Name: count, dtype: int64\n", + "\n", + "Type 2:\n", + "Type 2\n", + "Flying 97\n", + "Ground 35\n", + "Poison 34\n", + "Psychic 33\n", + "Fighting 26\n", + "Grass 25\n", + "Fairy 23\n", + "Steel 22\n", + "Dark 20\n", + "Dragon 18\n", + "Ice 14\n", + "Rock 14\n", + "Water 14\n", + "Ghost 14\n", + "Fire 12\n", + "Electric 6\n", + "Normal 4\n", + "Bug 3\n", + "Name: count, dtype: int64\n", + "\n", + "Generation:\n", + "Generation\n", + "1 166\n", + "5 165\n", + "3 160\n", + "4 121\n", + "2 106\n", + "6 82\n", + "Name: count, dtype: int64\n", + "\n", + "Legendary:\n", + "Legendary\n", + "False 735\n", + "True 65\n", + "Name: count, dtype: int64\n", + "\n", + "Count of null values:\n", + "# 0\n", + "Name 0\n", + "Type 1 0\n", + "Type 2 386\n", + "Total 0\n", + "HP 0\n", + "Attack 0\n", + "Defense 0\n", + "Sp. Atk 0\n", + "Sp. Def 0\n", + "Speed 0\n", + "Generation 0\n", + "Legendary 0\n", + "dtype: int64\n", + "\n", + "Count of duplicated values:\n", + "0\n" + ] + } + ], + "source": [ + "ch.check(pokemon)" ] }, { @@ -70,12 +347,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Legendary\n", + "False 735\n", + "True 65\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "legendary_counts = pokemon[\"Legendary\"].value_counts()\n", + "legendary_counts\n" ] }, { @@ -87,11 +379,70 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
Legendary
False417.213605106.760417
True637.38461560.937389
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "Legendary \n", + "False 417.213605 106.760417\n", + "True 637.384615 60.937389" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", + "legendary_stats = pokemon.groupby(\"Legendary\")[\"Total\"].agg([\"mean\", \"std\"])\n", + "legendary_stats\n", "\n" ] }, @@ -106,12 +457,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Null Hypothesis (H₀): There is no significant difference between the means of the two groups\n", + "Alternate Hypothesis (H₁): There means of points between the two groups is not equal\n", + "Test Statistic (t): 25.83\n", + "P-Value: 0.000000000000000000000000000000000000000000000093579543359574\n", + "\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# Hypotheses\n", + "null_hypothesis = \"There is no significant difference between the means of the two groups\" # H₀\n", + "alt_hypothesis = \"There means of points between the two groups is not equal\" # H₁\n", + "\n", + "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n", + "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n", + "\n", + "legendary_t = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n", + "legendary_f = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n", + "\n", + "# Two-sample t-test for independent samples\n", + "t_stat, p_value = std.ttest_ind(legendary_t, legendary_f, equal_var=False)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.60f}\")\n", + "print()\n" ] }, { @@ -123,12 +501,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean fares are different for legendary and non-legendary Pokemons.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "\n", + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean points for legendary and non-legendary Pokemons are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean fares are different for legendary and non-legendary Pokemons.\")\n" ] }, { @@ -140,12 +534,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Type 1\n", + "Water 112\n", + "Normal 98\n", + "Grass 70\n", + "Bug 69\n", + "Psychic 57\n", + "Fire 52\n", + "Rock 44\n", + "Electric 44\n", + "Ground 32\n", + "Ghost 32\n", + "Dragon 32\n", + "Dark 31\n", + "Poison 28\n", + "Fighting 27\n", + "Steel 27\n", + "Ice 24\n", + "Fairy 17\n", + "Flying 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "type_counts = pokemon[\"Type 1\"].value_counts()\n", + "type_counts" ] }, { @@ -157,12 +583,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Water Pokémon Mean: 430.46 and Std Dev: 113.19\n", + "Non-Water Pokémon Mean: 435.86 and Std Dev: 121.09\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "water_pokemon = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"]\n", + "non_water_pokemon = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"]\n", + "\n", + "water_mean = water_pokemon.mean()\n", + "water_std = water_pokemon.std()\n", + "\n", + "non_water_mean = non_water_pokemon.mean()\n", + "non_water_std = non_water_pokemon.std()\n", + "\n", + "print(f\"Water Pokémon Mean: {water_mean:.2f} and Std Dev: {water_std:.2f}\")\n", + "print(f\"Non-Water Pokémon Mean: {non_water_mean:.2f} and Std Dev: {non_water_std:.2f}\")\n" ] }, { @@ -174,12 +620,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Null Hypothesis (H₀): There is no significant difference between the means of the two groups\n", + "Alternate Hypothesis (H₁): There means of points between the two groups is not equal\n", + "Test Statistic (t): -0.44\n", + "P-Value: 0.66\n", + "\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# Hypotheses\n", + "null_hypothesis = \"There is no significant difference between the means of the two groups\" # H₀\n", + "alt_hypothesis = \"There means of points between the two groups is not equal\" # H₁\n", + "\n", + "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n", + "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n", + "\n", + "# Two-sample t-test for independent samples\n", + "t_stat, p_value = std.ttest_ind(water_pokemon, non_water_pokemon, equal_var=True)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.2f}\")\n", + "print()" ] }, { @@ -191,12 +661,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fail to Reject the Null Hypothesis: The mean points for water and non-water Pokemons are not significantly different.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "\n", + "# Decision-Making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean points for water and non-water Pokemons are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean fares are different for water and non-water Pokemons.\")" ] }, { @@ -210,11 +693,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Null Hypothesis (H₀): There is no significant difference between Attack and Defense scores\n", + "Alternate Hypothesis (H₁): There is a significant difference between Attack and Defense scores\n", + "T-statistic: 4.3256\n", + "P-value: 0.0000171403\n" + ] + } + ], "source": [ "# Your code here:\n", + "\n", + "# Hypotheses\n", + "null_hypothesis = \"There is no significant difference between Attack and Defense scores\" # H₀\n", + "alt_hypothesis = \"There is a significant difference between Attack and Defense scores\" # H₁\n", + "\n", + "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n", + "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n", + "\n", + "# Two-sample t-test for dependent samples\n", + "t_stat, p_value = std.ttest_rel(pokemon[\"Attack\"], pokemon[\"Defense\"])\n", + "\n", + "print(f\"T-statistic: {t_stat:.4f}\")\n", + "print(f\"P-value: {p_value:.10f}\")\n", "\n" ] }, @@ -227,12 +734,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject the null hypothesis: There is a significant difference between Attack and Defense scores.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "if p_value > alpha:\n", + " print(\"Fail to reject the null hypothesis: No significant difference between Attack and Defense scores.\")\n", + "else:\n", + " print(\"Reject the null hypothesis: There is a significant difference between Attack and Defense scores.\")" ] }, { @@ -244,12 +762,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Null Hypothesis (H₀): The mean Special Attack and Special Defense are equal\n", + "Alternate Hypothesis (H₁): The mean Special Attack and Special Defense are different\n", + "T-statistic: 0.8540\n", + "P-value: 0.3933685998\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# Hypotheses\n", + "null_hypothesis = \"The mean Special Attack and Special Defense are equal\" # H₀\n", + "alt_hypothesis = \" The mean Special Attack and Special Defense are different\" # H₁\n", + "\n", + "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n", + "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n", + "\n", + "# Two-sample t-test for dependent samples\n", + "t_stat, p_value = std.ttest_rel(pokemon[\"Sp. Atk\"], pokemon[\"Sp. Def\"])\n", + "\n", + "print(f\"T-statistic: {t_stat:.4f}\")\n", + "print(f\"P-value: {p_value:.10f}\")" ] }, { @@ -261,12 +802,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fail to reject the null hypothesis: No significant difference between Special Attack and Special Defense scores.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "\n", + "if p_value > alpha:\n", + " print(\"Fail to reject the null hypothesis: No significant difference between Special Attack and Special Defense scores.\")\n", + "else:\n", + " print(\"Reject the null hypothesis: There is a significant difference between Special Attack and Special Defense scores.\")" ] }, { @@ -280,12 +833,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Null Hypothesis (H₀): The mean difference between Attack and Defense is zero\n", + "Alternate Hypothesis (H₁): The mean difference is not zero\n", + "T-statistic: 4.3256\n", + "P-value: 0.0000171403\n", + "Reject the null hypothesis: There is a significant difference between Attack and Defense.\n" + ] + } + ], "source": [ "# Your code here:\n", - " \n", + "\n", + "# Hypotheses\n", + "null_hypothesis = \"The mean difference between Attack and Defense is zero\" # H₀\n", + "alt_hypothesis = \"The mean difference is not zero\" # H₁\n", + "\n", + "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n", + "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n", + "\n", + "diff = pokemon[\"Attack\"] - pokemon[\"Defense\"]\n", + "\n", + "# one-sample t-test\n", + "t_stat, p_value = std.ttest_1samp(diff, 0)\n", + "\n", + "print(f\"T-statistic: {t_stat:.4f}\")\n", + "print(f\"P-value: {p_value:.10f}\") \n", + "\n", + "if p_value < alpha:\n", + " print(\"Reject the null hypothesis: There is a significant difference between Attack and Defense.\")\n", + "else:\n", + " print(\"Fail to reject the null hypothesis: No significant difference between Attack and Defense.\")\n", " " ] }, @@ -302,12 +886,76 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Water TypeFalseTrue
Legendary
False627108
True614
\n", + "
" + ], + "text/plain": [ + "Water Type False True \n", + "Legendary \n", + "False 627 108\n", + "True 61 4" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "pokemon['Is_Water_Type'] = pokemon['Type 1'] == 'Water'\n", + "\n", + "# Create a contingency table comparing Legendary status and Water type\n", + "contingency_table = pd.crosstab(pokemon['Legendary'], pokemon['Is_Water_Type'], \n", + " rownames=['Legendary'], colnames=['Water Type'])\n", + "\n", + "contingency_table" ] }, { @@ -319,12 +967,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Null Hypothesis (H₀): There is no association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\n", + "Alternate Hypothesis (H₁): There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\n", + "Chi-squared statistic: 2.9429200762850503\n", + "p-value: 0.0862546724955095\n", + "Degrees of freedom: 1\n", + "Expected frequencies table:\n", + "[[632.1 102.9]\n", + " [ 55.9 9.1]]\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# Hypotheses\n", + "null_hypothesis = \"There is no association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\" # H₀\n", + "alt_hypothesis = \" There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not\" # H₁\n", + "\n", + "print(f\"Null Hypothesis (H₀): {null_hypothesis}\")\n", + "print(f\"Alternate Hypothesis (H₁): {alt_hypothesis}\")\n", + "\n", + "chi2, p, dof, expected = std.chi2_contingency(contingency_table)\n", + "\n", + "print(f\"Chi-squared statistic: {chi2}\")\n", + "print(f\"p-value: {p}\")\n", + "print(f\"Degrees of freedom: {dof}\")\n", + "print(f\"Expected frequencies table:\\n{expected}\")" ] }, { @@ -336,20 +1012,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We reject the null hypothesis: There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not.\n" + ] + } + ], "source": [ "# Your answer here:\n", - "\n" + "\n", + "if p_value > alpha:\n", + " print(f\"Fail to reject the null hypothesis: There is no association between whether a Pokémon is legendary or not and whether its Type 1 is water or not.\")\n", + "else:\n", + " print(f\"We reject the null hypothesis: There is an association between whether a Pokémon is legendary or not and whether its Type 1 is water or not.\")\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -368,7 +1049,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.13.2" } }, "nbformat": 4,