diff --git a/your_code/main.ipynb b/your_code/main.ipynb index 7810ccf..e7eab6e 100644 --- a/your_code/main.ipynb +++ b/your_code/main.ipynb @@ -14,11 +14,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Power_divergenceResult(statistic=191.93184027673232, pvalue=5.85583627060059e-38)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.stats import poisson\n", + "from scipy.stats import chisquare\n", + "import numpy as np\n", + "f_obs = np.array([35,99,104,110,62,25,10,3])\n", + "mean = 2.435\n", + "poisson_dist = poisson(mean)\n", + "poisson_pmfs = np.array([poisson_dist.pmf(i) for i in range(1,8)]) \n", + "poisson_pmfs\n", + "with_tail = np.append(poisson_pmfs,1- poisson_pmfs.sum())\n", + "with_tail\n", + "f_exp = with_tail*448\n", + "f_exp\n", + "chisquare(f_exp = f_exp, f_obs = f_obs)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# your answer here" + "#In this case, the p-value is extremely small (close to zero), which is significantly less than 0.05. Therefore, at a 95% confidence level, we reject the null hypothesis. There is strong evidence to suggest that the variables are not independent.\n", + "\n", + "#In conclusion, based on the chi-squared test, there is a significant relationship between the variables in the contingency table." ] }, { @@ -39,6 +73,13 @@ "Does the distribution of defective items follow this distribution?" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -60,11 +101,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Power_divergenceResult(statistic=336.43955678670346, pvalue=8.771593494342625e-74)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your answer here" + "from scipy.stats import chisquare\n", + "\n", + "observed_frequencies = [138, 53, 9]\n", + "expected_proportion = 0.05\n", + "total_samples = 200\n", + "categories = 3\n", + "\n", + "expected_probabilities = [expected_proportion**i * (1 - expected_proportion)**(categories - i) for i in range(categories)]\n", + "\n", + "expected_frequencies = [total_samples * prob for prob in expected_probabilities]\n", + "\n", + "expected_frequencies_adjusted = [freq * (total_samples / sum(expected_frequencies)) for freq in expected_frequencies]\n", + "\n", + "chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies_adjusted)" ] }, { @@ -79,12 +144,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0047192801370408155" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "from scipy.stats import chi2\n", + "f_obs = np.array([[32, 12], [14, 22], [6, 9]])\n", + "\n", + "row_totals = [44,36,15]\n", + "col_totals = [52,43]\n", + "grand_total = [95]\n", + "\n", + "f_exp = np.outer(row_totals, col_totals) / grand_total\n", + "\n", + "chi2_statistic = np.sum((f_obs - f_exp)**2 / f_exp)\n", + "\n", + "degrees_of_freedom = (f_obs.shape[0] - 1) * (f_obs.shape[1] - 1)\n", + "\n", + "p_value = 1 - chi2.cdf(chi2_statistic, degrees_of_freedom)\n", + "p_value" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "#your answer here" + "#don't reject hypothesis test" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -103,7 +210,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.11.5" } }, "nbformat": 4,