diff --git a/ai-ml/Nutritional_Facts_WebScrap.ipynb b/ai-ml/Nutritional_Facts_WebScrap.ipynb new file mode 100644 index 00000000..12e6060e --- /dev/null +++ b/ai-ml/Nutritional_Facts_WebScrap.ipynb @@ -0,0 +1,1858 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FRUITS" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the URL\n", + "url = \"https://www.nutritiontable.com/nutritions/?c=11\"\n", + "\n", + "# Send a GET request to the URL\n", + "response = requests.get(url)\n", + "\n", + "# Check if the request was successful\n", + "if response.status_code == 200:\n", + " # Parse the HTML content\n", + " soup = BeautifulSoup(response.content, \"html.parser\")\n", + " \n", + " div_element_units = soup.find(\"div\", {\"class\": \"vwtContainer widthContainer\", \"id\": \"pnlVwtContainer\"})\n", + "\n", + " if div_element_units:\n", + " # Find all tags within this div\n", + " a_tags = div_element_units.find_all(\"a\")\n", + "\n", + " # Extract href attributes and corresponding text from the tags\n", + " href_lists = []\n", + " \n", + " for a in a_tags:\n", + " href_list = a.get(\"href\")\n", + " # Append as a tuple\n", + " href_lists.append(href_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "url_list = 'https://www.nutritiontable.com/nutritions/nutrient/?id=69'\n", + "\n", + "\n", + "# Send a GET request to the URL\n", + "response = requests.get(url_list)\n", + "\n", + "# Check if the request was successful\n", + "if response.status_code == 200:\n", + " # Parse the HTML content\n", + " soup = BeautifulSoup(response.content, \"html.parser\")\n", + " \n", + " fieldset_div = soup.find(\"div\", {\"id\": \"ctl00_cphMain_pnlFieldsetVW\"})\n", + "\n", + " if fieldset_div:\n", + " # Find all tags within this div\n", + " rowitem_divs = fieldset_div.find_all(\"div\", {\"class\": \"rowitem\"})\n", + "\n", + " # Extract href attributes and corresponding text from the tags\n", + " row_header = [\"Name\",]\n", + " for row in rowitem_divs:\n", + " text_name = row.get_text(separator='|', strip=True).split('|')[0]\n", + " row_header.append(text_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(columns=row_header)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "url_list = 'https://www.nutritiontable.com'\n", + "\n", + "url = [f'{url_list}{link}' for link in href_lists]\n", + "\n", + "for link in url:\n", + " # Send a GET request to the URL\n", + " response = requests.get(link)\n", + "\n", + " # Check if the request was successful\n", + " if response.status_code == 200:\n", + " # Parse the HTML content\n", + " soup = BeautifulSoup(response.content, \"html.parser\")\n", + " \n", + " field_div = soup.find(\"div\", {\"id\": \"ctl00_cphMain_pnlShowProd\", \"class\": \"col-7 col-m-7 col-t-15\"})\n", + " if field_div:\n", + " # Find all tags within this div\n", + " h2_tag = field_div.find(\"h2\")\n", + " fruit = h2_tag.text\n", + " \n", + " \n", + " fieldset_div = soup.find(\"div\", {\"id\": \"ctl00_cphMain_pnlFieldsetVW\"})\n", + "\n", + " if fieldset_div:\n", + " # Find all tags within this div\n", + " rowitem_divs = fieldset_div.find_all(\"div\", {\"class\": \"rowitem\"})\n", + "\n", + " # Extract href attributes and corresponding text from the tags\n", + " row = []\n", + " row_value = []\n", + " for row in rowitem_divs:\n", + " text_value = row.get_text(separator='|', strip=True).split('|')[1]\n", + " row_value.append(text_value)\n", + " \n", + " row = [fruit] + row_value\n", + " \n", + " length = len(df)\n", + " df.loc[length] = row" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameEmotional value:Health value:Product group:Energy (kcal)Energy (kJoule)WaterProteinFatSaturated fat...Potassium (K)Calcium (Ca)Phosphorus (P)Iron (Fe)Magnesium (Mg)Copper (Cu)Zinc (Zn)Selenium (Se)Iodine (I)Manganese (Mn)
0Apple, fresh8.4-Fruit5422984,30,40,00,0...1204,010,00,25,000,100,04---
1Applesauce, canned, sweetened7.2-Fruit8134178,50,20,10,0...1144,07,00,13,000,010,02---
2Apricot, canned, no skin5.9-Fruit7431080,30,50,10,0...19611,015,00,75,000,050,10---
3Apricot, dried6.2-Fruit274114526,95,00,10,0...140080,0110,04,460,000,800,40---
4Apricot, fresh7.4-Fruit6025283,41,40,40,0...28016,020,00,010,000,000,00---
5Bananas, fresh8.0-Fruit8636376,81,20,20,0...4008,728,00,630,000,130,22---
6Black currant7.3-Fruit4720085,61,20,20,0...19045,030,00,915,000,150,30---
7Blackberries, raw7.8-Fruit5824383,11,21,00,1...19045,030,00,930,000,700,00---
8Blueberries6.7-Fruit7531677,10,60,20,0...6012,016,02,55,000,070,20---
9Cherries, red, sour, fresh5.8-Fruit5222084,91,00,00,0...19010,020,00,410,000,060,10---
10Cherries, red, sweet, fresh8.0-Fruit5422884,90,50,10,0...24010,030,00,410,000,100,10---
11Cranberries, dried, sweetened5.6-Fruit35314759,11,20,00,0...----------
12Cranberries, fresh6.0-Fruit2410491,00,50,00,0...8010,010,00,55,000,060,12---
13Currants, red, fresh6.8-Fruit3816385,11,00,00,0...30020,040,01,010,000,100,15---
14Date, candied5.0-Fruit286119524,21,80,00,0...65046,057,01,950,000,300,30---
15Date, fresh6.3-Fruit15263858,82,20,00,0...40025,030,00,320,000,100,20---
16Figs, dried, uncooked5.7-Fruit270112826,73,51,30,0...850190,0110,03,375,000,400,90---
17Figs, fresh5.9-Fruit6426982,31,40,20,0...24055,030,00,617,000,070,25---
18Gooseberries, fresh7.0-Fruit3615390,01,00,00,0...20030,030,01,010,000,050,10---
19Grapefruit, fresh7.4-Fruit4519187,20,60,20,0...18020,017,00,310,000,040,17---
20Grapes, blue, fresh7.6-Fruit5322485,20,90,10,0...19020,015,00,412,000,100,08---
21Grapes, green/white, fresh7.9-Fruit5724284,50,50,00,0...18010,016,00,55,000,100,10---
22Guava, fresh5.2-Fruit4619585,80,90,50,0...23010,025,00,410,000,100,20---
23Khaki-fruit, fresh4.2-Fruit7832779,50,50,00,0...18010,020,00,410,000,100,10---
24Kiwi, fresh8.4-Fruit4820586,81,00,20,0...30040,030,00,810,000,120,10---
25Kumkwat4.7-Fruit7130180,51,00,00,0...18060,020,00,710,000,110,10---
26Lemon, pealed, fresh6.1-Fruit3615189,40,50,40,0...15010,020,00,110,000,050,10---
27Limes, fresh6.0-Fruit3213593,00,52,5-...8015,011,00,2------
28Lychee, fresh6.8-Fruit8134278,60,90,30,0...18010,035,00,420,000,150,18---
29Mangos, fresh7.4-Fruit6125983,31,10,30,0...19012,012,00,210,000,100,07---
30Marmalade, orange--Fruit292122027,20,80,5-...4030,015,00,55,00-----
31Melons, cantaloupe, fresh7.5-Fruit4719784,30,90,30,0...20015,010,00,810,000,030,20---
32Nectarines, fresh7.8-Fruit3615489,21,00,00,0...17510,050,00,410,00-0,10---
33Oranges, fresh8.5-Fruit4418685,21,00,20,0...17040,025,00,410,000,060,10---
34Papayas, fresh6.3-Fruit4519187,20,60,10,0...15020,016,00,310,000,080,30---
35Passion fruit, purple, fresh7.1-Fruit12050466,22,30,70,0...35015,060,01,630,000,100,80---
36Peaches, fresh8.0-Fruit3916287,50,80,10,0...2057,023,00,28,000,060,10---
37Pears, canned, heavy syrup5.7-Fruit7632279,10,30,10,0...6510,08,00,45,000,040,06---
38Pears, fresh8.4-Fruit5422984,40,50,30,0...657,08,00,45,000,040,07---
39Pineapple, fresh8.4-Fruit6125683,40,50,20,0...17016,09,00,420,000,100,10---
40PlantaIns, fresh6.1-Fruit13757564,41,00,20,0...3505,030,01,030,00-0,15---
41Plums, dried, uncooked5.8-Fruit22192736,02,00,00,0...80030,060,01,030,000,150,50---
42Plums, fresh6.4-Fruit6025583,60,80,6-...20010,020,00,210,000,300,10---
43Plums, mixed varieties, fresh7.5-Fruit5623584,50,60,20,0...22015,020,00,510,000,060,08---
44Pomegranate, fresh6.0-Fruit8536077,31,00,70,2...20010,010,00,310,000,200,40---
45Quinces, fresh5.3-Fruit6929180,90,50,10,0...-10,0-0,7------
46Raisins, mixed varieties6.9-Fruit329137513,02,00,00,0...65050,090,02,040,000,800,20---
47Raisins, seedles6.7-Fruit291121624,52,50,10,0...80030,0110,00,340,000,100,10---
48Raspberries, fresh7.3-Fruit6828676,01,20,50,1...17040,040,01,025,000,150,00---
49Red berries, fresh6.8-Fruit5121382,91,10,10,0...23829,027,01,012,000,100,20---
50Rose hip, fresh6.3-Fruit9539550,53,50,1-...290250,0260,00,5-1,501,00---
51Strawberries, fresh9.0-Fruit3615489,30,80,40,0...15025,030,00,915,000,120,12---
52Tangerines, mandarin oranges, fresh8.3-Fruit4920986,00,70,30,0...21033,020,00,07,000,000,12---
53Watermelon, fresh7.7-Fruit3715989,30,60,20,0...16010,010,00,410,000,000,10---
\n", + "

54 rows × 40 columns

\n", + "
" + ], + "text/plain": [ + " Name Emotional value: Health value: \\\n", + "0 Apple, fresh 8.4 - \n", + "1 Applesauce, canned, sweetened 7.2 - \n", + "2 Apricot, canned, no skin 5.9 - \n", + "3 Apricot, dried 6.2 - \n", + "4 Apricot, fresh 7.4 - \n", + "5 Bananas, fresh 8.0 - \n", + "6 Black currant 7.3 - \n", + "7 Blackberries, raw 7.8 - \n", + "8 Blueberries 6.7 - \n", + "9 Cherries, red, sour, fresh 5.8 - \n", + "10 Cherries, red, sweet, fresh 8.0 - \n", + "11 Cranberries, dried, sweetened 5.6 - \n", + "12 Cranberries, fresh 6.0 - \n", + "13 Currants, red, fresh 6.8 - \n", + "14 Date, candied 5.0 - \n", + "15 Date, fresh 6.3 - \n", + "16 Figs, dried, uncooked 5.7 - \n", + "17 Figs, fresh 5.9 - \n", + "18 Gooseberries, fresh 7.0 - \n", + "19 Grapefruit, fresh 7.4 - \n", + "20 Grapes, blue, fresh 7.6 - \n", + "21 Grapes, green/white, fresh 7.9 - \n", + "22 Guava, fresh 5.2 - \n", + "23 Khaki-fruit, fresh 4.2 - \n", + "24 Kiwi, fresh 8.4 - \n", + "25 Kumkwat 4.7 - \n", + "26 Lemon, pealed, fresh 6.1 - \n", + "27 Limes, fresh 6.0 - \n", + "28 Lychee, fresh 6.8 - \n", + "29 Mangos, fresh 7.4 - \n", + "30 Marmalade, orange - - \n", + "31 Melons, cantaloupe, fresh 7.5 - \n", + "32 Nectarines, fresh 7.8 - \n", + "33 Oranges, fresh 8.5 - \n", + "34 Papayas, fresh 6.3 - \n", + "35 Passion fruit, purple, fresh 7.1 - \n", + "36 Peaches, fresh 8.0 - \n", + "37 Pears, canned, heavy syrup 5.7 - \n", + "38 Pears, fresh 8.4 - \n", + "39 Pineapple, fresh 8.4 - \n", + "40 PlantaIns, fresh 6.1 - \n", + "41 Plums, dried, uncooked 5.8 - \n", + "42 Plums, fresh 6.4 - \n", + "43 Plums, mixed varieties, fresh 7.5 - \n", + "44 Pomegranate, fresh 6.0 - \n", + "45 Quinces, fresh 5.3 - \n", + "46 Raisins, mixed varieties 6.9 - \n", + "47 Raisins, seedles 6.7 - \n", + "48 Raspberries, fresh 7.3 - \n", + "49 Red berries, fresh 6.8 - \n", + "50 Rose hip, fresh 6.3 - \n", + "51 Strawberries, fresh 9.0 - \n", + "52 Tangerines, mandarin oranges, fresh 8.3 - \n", + "53 Watermelon, fresh 7.7 - \n", + "\n", + " Product group: Energy (kcal) Energy (kJoule) Water Protein Fat \\\n", + "0 Fruit 54 229 84,3 0,4 0,0 \n", + "1 Fruit 81 341 78,5 0,2 0,1 \n", + "2 Fruit 74 310 80,3 0,5 0,1 \n", + "3 Fruit 274 1145 26,9 5,0 0,1 \n", + "4 Fruit 60 252 83,4 1,4 0,4 \n", + "5 Fruit 86 363 76,8 1,2 0,2 \n", + "6 Fruit 47 200 85,6 1,2 0,2 \n", + "7 Fruit 58 243 83,1 1,2 1,0 \n", + "8 Fruit 75 316 77,1 0,6 0,2 \n", + "9 Fruit 52 220 84,9 1,0 0,0 \n", + "10 Fruit 54 228 84,9 0,5 0,1 \n", + "11 Fruit 353 1475 9,1 1,2 0,0 \n", + "12 Fruit 24 104 91,0 0,5 0,0 \n", + "13 Fruit 38 163 85,1 1,0 0,0 \n", + "14 Fruit 286 1195 24,2 1,8 0,0 \n", + "15 Fruit 152 638 58,8 2,2 0,0 \n", + "16 Fruit 270 1128 26,7 3,5 1,3 \n", + "17 Fruit 64 269 82,3 1,4 0,2 \n", + "18 Fruit 36 153 90,0 1,0 0,0 \n", + "19 Fruit 45 191 87,2 0,6 0,2 \n", + "20 Fruit 53 224 85,2 0,9 0,1 \n", + "21 Fruit 57 242 84,5 0,5 0,0 \n", + "22 Fruit 46 195 85,8 0,9 0,5 \n", + "23 Fruit 78 327 79,5 0,5 0,0 \n", + "24 Fruit 48 205 86,8 1,0 0,2 \n", + "25 Fruit 71 301 80,5 1,0 0,0 \n", + "26 Fruit 36 151 89,4 0,5 0,4 \n", + "27 Fruit 32 135 93,0 0,5 2,5 \n", + "28 Fruit 81 342 78,6 0,9 0,3 \n", + "29 Fruit 61 259 83,3 1,1 0,3 \n", + "30 Fruit 292 1220 27,2 0,8 0,5 \n", + "31 Fruit 47 197 84,3 0,9 0,3 \n", + "32 Fruit 36 154 89,2 1,0 0,0 \n", + "33 Fruit 44 186 85,2 1,0 0,2 \n", + "34 Fruit 45 191 87,2 0,6 0,1 \n", + "35 Fruit 120 504 66,2 2,3 0,7 \n", + "36 Fruit 39 162 87,5 0,8 0,1 \n", + "37 Fruit 76 322 79,1 0,3 0,1 \n", + "38 Fruit 54 229 84,4 0,5 0,3 \n", + "39 Fruit 61 256 83,4 0,5 0,2 \n", + "40 Fruit 137 575 64,4 1,0 0,2 \n", + "41 Fruit 221 927 36,0 2,0 0,0 \n", + "42 Fruit 60 255 83,6 0,8 0,6 \n", + "43 Fruit 56 235 84,5 0,6 0,2 \n", + "44 Fruit 85 360 77,3 1,0 0,7 \n", + "45 Fruit 69 291 80,9 0,5 0,1 \n", + "46 Fruit 329 1375 13,0 2,0 0,0 \n", + "47 Fruit 291 1216 24,5 2,5 0,1 \n", + "48 Fruit 68 286 76,0 1,2 0,5 \n", + "49 Fruit 51 213 82,9 1,1 0,1 \n", + "50 Fruit 95 395 50,5 3,5 0,1 \n", + "51 Fruit 36 154 89,3 0,8 0,4 \n", + "52 Fruit 49 209 86,0 0,7 0,3 \n", + "53 Fruit 37 159 89,3 0,6 0,2 \n", + "\n", + " Saturated fat ... Potassium (K) Calcium (Ca) Phosphorus (P) Iron (Fe) \\\n", + "0 0,0 ... 120 4,0 10,0 0,2 \n", + "1 0,0 ... 114 4,0 7,0 0,1 \n", + "2 0,0 ... 196 11,0 15,0 0,7 \n", + "3 0,0 ... 1400 80,0 110,0 4,4 \n", + "4 0,0 ... 280 16,0 20,0 0,0 \n", + "5 0,0 ... 400 8,7 28,0 0,6 \n", + "6 0,0 ... 190 45,0 30,0 0,9 \n", + "7 0,1 ... 190 45,0 30,0 0,9 \n", + "8 0,0 ... 60 12,0 16,0 2,5 \n", + "9 0,0 ... 190 10,0 20,0 0,4 \n", + "10 0,0 ... 240 10,0 30,0 0,4 \n", + "11 0,0 ... - - - - \n", + "12 0,0 ... 80 10,0 10,0 0,5 \n", + "13 0,0 ... 300 20,0 40,0 1,0 \n", + "14 0,0 ... 650 46,0 57,0 1,9 \n", + "15 0,0 ... 400 25,0 30,0 0,3 \n", + "16 0,0 ... 850 190,0 110,0 3,3 \n", + "17 0,0 ... 240 55,0 30,0 0,6 \n", + "18 0,0 ... 200 30,0 30,0 1,0 \n", + "19 0,0 ... 180 20,0 17,0 0,3 \n", + "20 0,0 ... 190 20,0 15,0 0,4 \n", + "21 0,0 ... 180 10,0 16,0 0,5 \n", + "22 0,0 ... 230 10,0 25,0 0,4 \n", + "23 0,0 ... 180 10,0 20,0 0,4 \n", + "24 0,0 ... 300 40,0 30,0 0,8 \n", + "25 0,0 ... 180 60,0 20,0 0,7 \n", + "26 0,0 ... 150 10,0 20,0 0,1 \n", + "27 - ... 80 15,0 11,0 0,2 \n", + "28 0,0 ... 180 10,0 35,0 0,4 \n", + "29 0,0 ... 190 12,0 12,0 0,2 \n", + "30 - ... 40 30,0 15,0 0,5 \n", + "31 0,0 ... 200 15,0 10,0 0,8 \n", + "32 0,0 ... 175 10,0 50,0 0,4 \n", + "33 0,0 ... 170 40,0 25,0 0,4 \n", + "34 0,0 ... 150 20,0 16,0 0,3 \n", + "35 0,0 ... 350 15,0 60,0 1,6 \n", + "36 0,0 ... 205 7,0 23,0 0,2 \n", + "37 0,0 ... 65 10,0 8,0 0,4 \n", + "38 0,0 ... 65 7,0 8,0 0,4 \n", + "39 0,0 ... 170 16,0 9,0 0,4 \n", + "40 0,0 ... 350 5,0 30,0 1,0 \n", + "41 0,0 ... 800 30,0 60,0 1,0 \n", + "42 - ... 200 10,0 20,0 0,2 \n", + "43 0,0 ... 220 15,0 20,0 0,5 \n", + "44 0,2 ... 200 10,0 10,0 0,3 \n", + "45 0,0 ... - 10,0 - 0,7 \n", + "46 0,0 ... 650 50,0 90,0 2,0 \n", + "47 0,0 ... 800 30,0 110,0 0,3 \n", + "48 0,1 ... 170 40,0 40,0 1,0 \n", + "49 0,0 ... 238 29,0 27,0 1,0 \n", + "50 - ... 290 250,0 260,0 0,5 \n", + "51 0,0 ... 150 25,0 30,0 0,9 \n", + "52 0,0 ... 210 33,0 20,0 0,0 \n", + "53 0,0 ... 160 10,0 10,0 0,4 \n", + "\n", + " Magnesium (Mg) Copper (Cu) Zinc (Zn) Selenium (Se) Iodine (I) \\\n", + "0 5,00 0,10 0,04 - - \n", + "1 3,00 0,01 0,02 - - \n", + "2 5,00 0,05 0,10 - - \n", + "3 60,00 0,80 0,40 - - \n", + "4 10,00 0,00 0,00 - - \n", + "5 30,00 0,13 0,22 - - \n", + "6 15,00 0,15 0,30 - - \n", + "7 30,00 0,70 0,00 - - \n", + "8 5,00 0,07 0,20 - - \n", + "9 10,00 0,06 0,10 - - \n", + "10 10,00 0,10 0,10 - - \n", + "11 - - - - - \n", + "12 5,00 0,06 0,12 - - \n", + "13 10,00 0,10 0,15 - - \n", + "14 50,00 0,30 0,30 - - \n", + "15 20,00 0,10 0,20 - - \n", + "16 75,00 0,40 0,90 - - \n", + "17 17,00 0,07 0,25 - - \n", + "18 10,00 0,05 0,10 - - \n", + "19 10,00 0,04 0,17 - - \n", + "20 12,00 0,10 0,08 - - \n", + "21 5,00 0,10 0,10 - - \n", + "22 10,00 0,10 0,20 - - \n", + "23 10,00 0,10 0,10 - - \n", + "24 10,00 0,12 0,10 - - \n", + "25 10,00 0,11 0,10 - - \n", + "26 10,00 0,05 0,10 - - \n", + "27 - - - - - \n", + "28 20,00 0,15 0,18 - - \n", + "29 10,00 0,10 0,07 - - \n", + "30 5,00 - - - - \n", + "31 10,00 0,03 0,20 - - \n", + "32 10,00 - 0,10 - - \n", + "33 10,00 0,06 0,10 - - \n", + "34 10,00 0,08 0,30 - - \n", + "35 30,00 0,10 0,80 - - \n", + "36 8,00 0,06 0,10 - - \n", + "37 5,00 0,04 0,06 - - \n", + "38 5,00 0,04 0,07 - - \n", + "39 20,00 0,10 0,10 - - \n", + "40 30,00 - 0,15 - - \n", + "41 30,00 0,15 0,50 - - \n", + "42 10,00 0,30 0,10 - - \n", + "43 10,00 0,06 0,08 - - \n", + "44 10,00 0,20 0,40 - - \n", + "45 - - - - - \n", + "46 40,00 0,80 0,20 - - \n", + "47 40,00 0,10 0,10 - - \n", + "48 25,00 0,15 0,00 - - \n", + "49 12,00 0,10 0,20 - - \n", + "50 - 1,50 1,00 - - \n", + "51 15,00 0,12 0,12 - - \n", + "52 7,00 0,00 0,12 - - \n", + "53 10,00 0,00 0,10 - - \n", + "\n", + " Manganese (Mn) \n", + "0 - \n", + "1 - \n", + "2 - \n", + "3 - \n", + "4 - \n", + "5 - \n", + "6 - \n", + "7 - \n", + "8 - \n", + "9 - \n", + "10 - \n", + "11 - \n", + "12 - \n", + "13 - \n", + "14 - \n", + "15 - \n", + "16 - \n", + "17 - \n", + "18 - \n", + "19 - \n", + "20 - \n", + "21 - \n", + "22 - \n", + "23 - \n", + "24 - \n", + "25 - \n", + "26 - \n", + "27 - \n", + "28 - \n", + "29 - \n", + "30 - \n", + "31 - \n", + "32 - \n", + "33 - \n", + "34 - \n", + "35 - \n", + "36 - \n", + "37 - \n", + "38 - \n", + "39 - \n", + "40 - \n", + "41 - \n", + "42 - \n", + "43 - \n", + "44 - \n", + "45 - \n", + "46 - \n", + "47 - \n", + "48 - \n", + "49 - \n", + "50 - \n", + "51 - \n", + "52 - \n", + "53 - \n", + "\n", + "[54 rows x 40 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vegitables" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the URL\n", + "url = \"https://www.nutritiontable.com/nutritions/?c=10\"\n", + "\n", + "# Send a GET request to the URL\n", + "response = requests.get(url)\n", + "\n", + "# Check if the request was successful\n", + "if response.status_code == 200:\n", + " # Parse the HTML content\n", + " soup = BeautifulSoup(response.content, \"html.parser\")\n", + " \n", + " # Find the
with class \"mobspano\" and id \"ctl00_pnlHeaderLetters\"\n", + " div_element = soup.find(\"div\", {\"class\": \"headerLetters\", \"id\": \"ctl00_pnlHeaderLetters\"})\n", + " \n", + " if div_element:\n", + " # Find all tags within this div\n", + " a_tags = div_element.find_all(\"a\")\n", + " \n", + " # Extract href attributes from the tags\n", + " href_list = [a.get(\"href\") for a in a_tags]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "div_element_units = soup.find(\"div\", {\"class\": \"vwtContainer widthContainer\", \"id\": \"pnlVwtContainer\"})\n", + "\n", + "if div_element_units:\n", + " # Find all tags within this div\n", + " a_tags = div_element_units.find_all(\"a\")\n", + "\n", + " # Extract href attributes and corresponding text from the tags\n", + " lists = []\n", + " for a in a_tags:\n", + " href_list = a.get(\"href\")\n", + " href_text = a.text\n", + " lists.append((href_list, href_text)) # Append as a tuple\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ai-ml/data_preprocessing_fruits_nutritional_facts.ipynb b/ai-ml/data_preprocessing_fruits_nutritional_facts.ipynb new file mode 100644 index 00000000..90fb8a42 --- /dev/null +++ b/ai-ml/data_preprocessing_fruits_nutritional_facts.ipynb @@ -0,0 +1,606 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing necessary libraries\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameEmotional value:Health value:Product group:Energy (kcal)Energy (kJoule)WaterProteinFatSaturated fat...Potassium (K)Calcium (Ca)Phosphorus (P)Iron (Fe)Magnesium (Mg)Copper (Cu)Zinc (Zn)Selenium (Se)Iodine (I)Manganese (Mn)
0Apple, fresh8.4-Fruit5422984,30,40,00,0...1204,010,00,25,000,100,04---
1Applesauce, canned, sweetened7.2-Fruit8134178,50,20,10,0...1144,07,00,13,000,010,02---
2Apricot, canned, no skin5.9-Fruit7431080,30,50,10,0...19611,015,00,75,000,050,10---
3Apricot, dried6.2-Fruit274114526,95,00,10,0...140080,0110,04,460,000,800,40---
4Apricot, fresh7.4-Fruit6025283,41,40,40,0...28016,020,00,010,000,000,00---
\n", + "

5 rows × 40 columns

\n", + "
" + ], + "text/plain": [ + " Name Emotional value: Health value: \\\n", + "0 Apple, fresh 8.4 - \n", + "1 Applesauce, canned, sweetened 7.2 - \n", + "2 Apricot, canned, no skin 5.9 - \n", + "3 Apricot, dried 6.2 - \n", + "4 Apricot, fresh 7.4 - \n", + "\n", + " Product group: Energy (kcal) Energy (kJoule) Water Protein Fat \\\n", + "0 Fruit 54 229 84,3 0,4 0,0 \n", + "1 Fruit 81 341 78,5 0,2 0,1 \n", + "2 Fruit 74 310 80,3 0,5 0,1 \n", + "3 Fruit 274 1145 26,9 5,0 0,1 \n", + "4 Fruit 60 252 83,4 1,4 0,4 \n", + "\n", + " Saturated fat ... Potassium (K) Calcium (Ca) Phosphorus (P) Iron (Fe) \\\n", + "0 0,0 ... 120 4,0 10,0 0,2 \n", + "1 0,0 ... 114 4,0 7,0 0,1 \n", + "2 0,0 ... 196 11,0 15,0 0,7 \n", + "3 0,0 ... 1400 80,0 110,0 4,4 \n", + "4 0,0 ... 280 16,0 20,0 0,0 \n", + "\n", + " Magnesium (Mg) Copper (Cu) Zinc (Zn) Selenium (Se) Iodine (I) Manganese (Mn) \n", + "0 5,00 0,10 0,04 - - - \n", + "1 3,00 0,01 0,02 - - - \n", + "2 5,00 0,05 0,10 - - - \n", + "3 60,00 0,80 0,40 - - - \n", + "4 10,00 0,00 0,00 - - - \n", + "\n", + "[5 rows x 40 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Read the CSV file into a DataFrame\n", + "fruits = pd.read_csv('friuts_nutritional_facts.csv')\n", + "fruits.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 54 entries, 0 to 53\n", + "Data columns (total 40 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Name 54 non-null object\n", + " 1 Emotional value: 54 non-null object\n", + " 2 Health value: 54 non-null object\n", + " 3 Product group: 54 non-null object\n", + " 4 Energy (kcal) 54 non-null int64 \n", + " 5 Energy (kJoule) 54 non-null int64 \n", + " 6 Water 54 non-null object\n", + " 7 Protein 54 non-null object\n", + " 8 Fat 54 non-null object\n", + " 9 Saturated fat 54 non-null object\n", + " 10 Monounsaturated fat 54 non-null object\n", + " 11 Polyunsaturated fat 54 non-null object\n", + " 12 Trans fat 54 non-null object\n", + " 13 Cholesterol 54 non-null object\n", + " 14 Carbohydrates 54 non-null object\n", + " 15 Sugars 54 non-null object\n", + " 16 Polysaccharides 54 non-null object\n", + " 17 Dietary fiber 54 non-null object\n", + " 18 Vitamin A (Retinol) 54 non-null object\n", + " 19 Vitamin B1 (Thiamine) 54 non-null object\n", + " 20 Vitamin B (Repulfvin) 54 non-null object\n", + " 21 Vitamin B3 (Niacin) 54 non-null object\n", + " 22 Vitamin B6 (Pyridoxine) 54 non-null object\n", + " 23 Vitamin B11 (Folic acid) 54 non-null object\n", + " 24 Vitamin B12 (Cobalamin) 54 non-null object\n", + " 25 Vitamin C (Ascorbic Acid) 54 non-null object\n", + " 26 Vitamin D (Calcifine) 54 non-null object\n", + " 27 Vitamin E (Tocopherol) 54 non-null object\n", + " 28 Vitamin K (Phyloquinone) 54 non-null object\n", + " 29 Sodium (Na) 54 non-null object\n", + " 30 Potassium (K) 54 non-null object\n", + " 31 Calcium (Ca) 54 non-null object\n", + " 32 Phosphorus (P) 54 non-null object\n", + " 33 Iron (Fe) 54 non-null object\n", + " 34 Magnesium (Mg) 54 non-null object\n", + " 35 Copper (Cu) 54 non-null object\n", + " 36 Zinc (Zn) 54 non-null object\n", + " 37 Selenium (Se) 54 non-null object\n", + " 38 Iodine (I) 54 non-null object\n", + " 39 Manganese (Mn) 54 non-null object\n", + "dtypes: int64(2), object(38)\n", + "memory usage: 17.0+ KB\n" + ] + } + ], + "source": [ + "# Display information about the DataFrame, including the data types of each column and memory usage\n", + "fruits.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking duplicate values\n", + "fruits.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace \"-\" with NaN\n", + "fruits.replace(\"-\", float(\"nan\"), inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace commas with periods\n", + "columns_to_convert = fruits.columns.difference(['Name', 'Product group:'])\n", + "fruits[columns_to_convert] = fruits[columns_to_convert].replace(',', '.', regex=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert all columns except 'Name' and 'Product group' to float\n", + "fruits[columns_to_convert] = fruits[columns_to_convert].astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Rename columns to remove trailing colon\n", + "fruits.rename(columns=lambda x: x.split(':')[0].strip(), inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameEmotional valueHealth valueProduct groupEnergy (kcal)Energy (kJoule)WaterProteinFatSaturated fat...Potassium (K)Calcium (Ca)Phosphorus (P)Iron (Fe)Magnesium (Mg)Copper (Cu)Zinc (Zn)Selenium (Se)Iodine (I)Manganese (Mn)
0Apple, fresh8.4NaNFruit54.0229.084.30.40.00.0...120.04.010.00.25.00.100.04NaNNaNNaN
1Applesauce, canned, sweetened7.2NaNFruit81.0341.078.50.20.10.0...114.04.07.00.13.00.010.02NaNNaNNaN
2Apricot, canned, no skin5.9NaNFruit74.0310.080.30.50.10.0...196.011.015.00.75.00.050.10NaNNaNNaN
3Apricot, dried6.2NaNFruit274.01145.026.95.00.10.0...1400.080.0110.04.460.00.800.40NaNNaNNaN
4Apricot, fresh7.4NaNFruit60.0252.083.41.40.40.0...280.016.020.00.010.00.000.00NaNNaNNaN
\n", + "

5 rows × 40 columns

\n", + "
" + ], + "text/plain": [ + " Name Emotional value Health value Product group \\\n", + "0 Apple, fresh 8.4 NaN Fruit \n", + "1 Applesauce, canned, sweetened 7.2 NaN Fruit \n", + "2 Apricot, canned, no skin 5.9 NaN Fruit \n", + "3 Apricot, dried 6.2 NaN Fruit \n", + "4 Apricot, fresh 7.4 NaN Fruit \n", + "\n", + " Energy (kcal) Energy (kJoule) Water Protein Fat Saturated fat ... \\\n", + "0 54.0 229.0 84.3 0.4 0.0 0.0 ... \n", + "1 81.0 341.0 78.5 0.2 0.1 0.0 ... \n", + "2 74.0 310.0 80.3 0.5 0.1 0.0 ... \n", + "3 274.0 1145.0 26.9 5.0 0.1 0.0 ... \n", + "4 60.0 252.0 83.4 1.4 0.4 0.0 ... \n", + "\n", + " Potassium (K) Calcium (Ca) Phosphorus (P) Iron (Fe) Magnesium (Mg) \\\n", + "0 120.0 4.0 10.0 0.2 5.0 \n", + "1 114.0 4.0 7.0 0.1 3.0 \n", + "2 196.0 11.0 15.0 0.7 5.0 \n", + "3 1400.0 80.0 110.0 4.4 60.0 \n", + "4 280.0 16.0 20.0 0.0 10.0 \n", + "\n", + " Copper (Cu) Zinc (Zn) Selenium (Se) Iodine (I) Manganese (Mn) \n", + "0 0.10 0.04 NaN NaN NaN \n", + "1 0.01 0.02 NaN NaN NaN \n", + "2 0.05 0.10 NaN NaN NaN \n", + "3 0.80 0.40 NaN NaN NaN \n", + "4 0.00 0.00 NaN NaN NaN \n", + "\n", + "[5 rows x 40 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Final in the required format\n", + "fruits.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Write the DataFrame to a CSV file\n", + "fruits.to_csv('fruits_nutritional_facts_preprocessed.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ai-ml/friuts_nutritional_facts.csv b/ai-ml/friuts_nutritional_facts.csv new file mode 100644 index 00000000..4d44806c --- /dev/null +++ b/ai-ml/friuts_nutritional_facts.csv @@ -0,0 +1,55 @@ +Name,Emotional value:,Health value:,Product group:,Energy (kcal),Energy (kJoule),Water,Protein,Fat,Saturated fat,Monounsaturated fat,Polyunsaturated fat,Trans fat,Cholesterol,Carbohydrates,Sugars,Polysaccharides,Dietary fiber,Vitamin A (Retinol),Vitamin B1 (Thiamine),Vitamin B (Repulfvin),Vitamin B3 (Niacin),Vitamin B6 (Pyridoxine),Vitamin B11 (Folic acid),Vitamin B12 (Cobalamin),Vitamin C (Ascorbic Acid),Vitamin D (Calcifine),Vitamin E (Tocopherol),Vitamin K (Phyloquinone),Sodium (Na),Potassium (K),Calcium (Ca),Phosphorus (P),Iron (Fe),Magnesium (Mg),Copper (Cu),Zinc (Zn),Selenium (Se),Iodine (I),Manganese (Mn) +"Apple, fresh",8.4,-,Fruit,54,229,"84,3","0,4","0,0","0,0","0,0","0,0",-,"0,0","12,0","11,8",-,"2,3","0,00","0,02","0,01",-,"0,05",3,"0,00","10,0","0,0",-,-,2,120,"4,0","10,0","0,2","5,00","0,10","0,04",-,-,- +"Applesauce, canned, sweetened",7.2,-,Fruit,81,341,"78,5","0,2","0,1","0,0","0,0","0,0",-,"0,0","19,2","19,2",-,"1,0","0,00","0,02","0,01",-,"0,05",0,"0,00","2,0","0,0",-,-,3,114,"4,0","7,0","0,1","3,00","0,01","0,02",-,-,- +"Apricot, canned, no skin",5.9,-,Fruit,74,310,"80,3","0,5","0,1","0,0","0,0","0,0",-,"0,0","17,0","16,0",-,"1,1","0,10","0,02","0,02",-,"0,05",1,"0,00","4,0","0,0",-,-,13,196,"11,0","15,0","0,7","5,00","0,05","0,10",-,-,- +"Apricot, dried",6.2,-,Fruit,274,1145,"26,9","5,0","0,1","0,0","0,0","0,0",-,"0,0","58,5","55,8",-,"8,5","0,18","0,17","0,12",-,"0,17",7,"0,00","10,0","0,0",-,-,10,1400,"80,0","110,0","4,4","60,00","0,80","0,40",-,-,- +"Apricot, fresh",7.4,-,Fruit,60,252,"83,4","1,4","0,4","0,0","0,0","0,0",-,"0,0","11,5","7,7",-,"2,3","0,18","0,06","0,05",-,"0,06",4,"0,00","9,0","0,0",-,-,0,280,"16,0","20,0","0,0","10,00","0,00","0,00",-,-,- +"Bananas, fresh",8.0,-,Fruit,86,363,"76,8","1,2","0,2","0,0","0,0","0,1",-,"0,0","18,8","18,2",-,"2,0","0,01","0,04","0,06",-,"0,37",15,"0,00","12,0","0,0",-,-,1,400,"8,7","28,0","0,6","30,00","0,13","0,22",-,-,- +Black currant,7.3,-,Fruit,47,200,"85,6","1,2","0,2","0,0","0,0","0,0",-,"0,0","8,5","7,0",-,"3,5","0,12","0,03","0,04",-,"0,05",8,"0,00","150,0","0,0",-,-,5,190,"45,0","30,0","0,9","15,00","0,15","0,30",-,-,- +"Blackberries, raw",7.8,-,Fruit,58,243,"83,1","1,2","1,0","0,1","0,1","0,6",-,"0,0","8,5","6,8",-,"5,2","0,12","0,03","0,04",-,"0,05",20,"0,00","17,0","0,0",-,-,3,190,"45,0","30,0","0,9","30,00","0,70","0,00",-,-,- +Blueberries,6.7,-,Fruit,75,316,"77,1","0,6","0,2","0,0","0,0","0,0",-,"0,0","14,5","14,0",-,"6,6","0,02","0,02","0,02",-,"0,06",10,"0,00","12,0","0,0",-,-,4,60,"12,0","16,0","2,5","5,00","0,07","0,20",-,-,- +"Cherries, red, sour, fresh",5.8,-,Fruit,52,220,"84,9","1,0","0,0","0,0","0,0","0,0",-,"0,0","11,0","11,0",-,"2,1","0,00","0,02","0,02",-,"0,04",4,"0,00","10,0","0,0",-,-,2,190,"10,0","20,0","0,4","10,00","0,06","0,10",-,-,- +"Cherries, red, sweet, fresh",8.0,-,Fruit,54,228,"84,9","0,5","0,1","0,0","0,0","0,0",-,"0,0","12,0","11,5",-,"1,5","0,01","0,02","0,02",-,"0,03",4,"0,00","5,0","0,0",-,-,0,240,"10,0","30,0","0,4","10,00","0,10","0,10",-,-,- +"Cranberries, dried, sweetened",5.6,-,Fruit,353,1475,"9,1","1,2","0,0","0,0","0,0","0,0",-,"0,0","83,5","75,0",-,"5,2",-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,- +"Cranberries, fresh",6.0,-,Fruit,24,104,"91,0","0,5","0,0","0,0","0,0","0,0",-,"0,0","4,0","3,5",-,"3,5","0,00","0,03","0,02",-,"0,06",2,"0,00","17,0","0,0",-,-,1,80,"10,0","10,0","0,5","5,00","0,06","0,12",-,-,- +"Currants, red, fresh",6.8,-,Fruit,38,163,"85,1","1,0","0,0","0,0","0,0","0,0",-,"0,0","4,8","4,2",-,"8,1","0,00","0,07","0,02",-,"0,06",10,"0,00","10,0","0,0",-,-,1,300,"20,0","40,0","1,0","10,00","0,10","0,15",-,-,- +"Date, candied",5.0,-,Fruit,286,1195,"24,2","1,8","0,0","0,0","0,0","0,0",-,"0,0","65,0","63,5",-,"8,0","7,00","0,04","0,07",-,"0,13",4,"0,00","3,0","0,0",-,-,35,650,"46,0","57,0","1,9","50,00","0,30","0,30",-,-,- +"Date, fresh",6.3,-,Fruit,152,638,"58,8","2,2","0,0","0,0","0,0","0,0",-,"0,0","33,0","29,0",-,"5,0","0,00","0,05","0,06",-,"0,10",20,"0,00","15,0","0,0",-,-,10,400,"25,0","30,0","0,3","20,00","0,10","0,20",-,-,- +"Figs, dried, uncooked",5.7,-,Fruit,270,1128,"26,7","3,5","1,3","0,0","0,0","0,0",-,"0,0","54,0","54,0",-,"13,5","0,01","0,12","0,09",-,"0,40",7,"0,00","2,0","0,0",-,-,40,850,"190,0","110,0","3,3","75,00","0,40","0,90",-,-,- +"Figs, fresh",5.9,-,Fruit,64,269,"82,3","1,4","0,2","0,0","0,0","0,0",-,"0,0","13,0","13,0",-,"2,1","0,03","0,05","0,05",-,"0,11",-,"0,00","2,8","0,0",-,-,2,240,"55,0","30,0","0,6","17,00","0,07","0,25",-,-,- +"Gooseberries, fresh",7.0,-,Fruit,36,153,"90,0","1,0","0,0","0,0","0,0","0,0",-,"0,0","8,0","7,8",-,"0,0","0,01","0,02","0,01",-,"0,08",6,"0,00","30,0","0,0",-,-,5,200,"30,0","30,0","1,0","10,00","0,05","0,10",-,-,- +"Grapefruit, fresh",7.4,-,Fruit,45,191,"87,2","0,6","0,2","0,0","0,0","0,0",-,"0,0","9,5","7,5",-,"1,5","0,00","0,05","0,02",-,"0,03",15,"0,00","45,0","0,0",-,-,1,180,"20,0","17,0","0,3","10,00","0,04","0,17",-,-,- +"Grapes, blue, fresh",7.6,-,Fruit,53,224,"85,2","0,9","0,1","0,0","0,0","0,0",-,"0,0","11,5","11,0",-,"1,3","0,01","0,03","0,02",-,"0,10",1,"0,00","2,5","0,0",-,-,0,190,"20,0","15,0","0,4","12,00","0,10","0,08",-,-,- +"Grapes, green/white, fresh",7.9,-,Fruit,57,242,"84,5","0,5","0,0","0,0","0,0","0,0",-,"0,0","13,5","13,3",-,"0,5","0,01","0,04","0,02",-,"0,10",1,"0,00","2,0","0,0",-,-,1,180,"10,0","16,0","0,5","5,00","0,10","0,10",-,-,- +"Guava, fresh",5.2,-,Fruit,46,195,"85,8","0,9","0,5","0,0","0,0","0,0",-,"0,0","7,5","6,0",-,"4,3","0,07","0,04","0,04",-,"0,10",-,"0,00","220,0","0,0",-,-,5,230,"10,0","25,0","0,4","10,00","0,10","0,20",-,-,- +"Khaki-fruit, fresh",4.2,-,Fruit,78,327,"79,5","0,5","0,0","0,0","0,0","0,0",-,"0,0","18,5","18,3",-,"0,5","0,20","0,02","0,03",-,-,-,"0,00","20,0","0,0",-,-,1,180,"10,0","20,0","0,4","10,00","0,10","0,10",-,-,- +"Kiwi, fresh",8.4,-,Fruit,48,205,"86,8","1,0","0,2","0,0","0,0","0,0",-,"0,0","10,3","7,5",-,"0,7","0,06","0,02","0,05",-,"0,10",20,"0,00","65,0","0,0",-,-,4,300,"40,0","30,0","0,8","10,00","0,12","0,10",-,-,- +Kumkwat,4.7,-,Fruit,71,301,"80,5","1,0","0,0","0,0","0,0","0,0",-,"0,0","16,0","15,8",-,"1,5","0,01","0,15","0,06",-,"0,06",-,"0,00","60,0","0,0",-,-,5,180,"60,0","20,0","0,7","10,00","0,11","0,10",-,-,- +"Lemon, pealed, fresh",6.1,-,Fruit,36,151,"89,4","0,5","0,4","0,0","0,0","0,0",-,"0,0","6,5","3,0",-,"2,2","0,00","0,06","0,02",-,"0,04",8,"0,00","45,0","0,0",-,-,2,150,"10,0","20,0","0,1","10,00","0,05","0,10",-,-,- +"Limes, fresh",6.0,-,Fruit,32,135,"93,0","0,5","2,5",-,-,-,-,"0,0","2,0","1,9",-,-,-,"0,03","0,02",-,-,-,-,"45,0",-,-,-,2,80,"15,0","11,0","0,2",-,-,-,-,-,- +"Lychee, fresh",6.8,-,Fruit,81,342,"78,6","0,9","0,3","0,0","0,0","0,0",-,"0,0","18,0","16,8",-,"1,2","0,00","0,05","0,05",-,"0,04",-,"0,00","39,0","0,0",-,-,2,180,"10,0","35,0","0,4","20,00","0,15","0,18",-,-,- +"Mangos, fresh",7.4,-,Fruit,61,259,"83,3","1,1","0,3","0,0","0,0","0,1",-,"0,0","12,8","12,2",-,"1,5","0,10","0,05","0,05",-,"0,04",20,"0,00","36,0","0,0",-,-,5,190,"12,0","12,0","0,2","10,00","0,10","0,07",-,-,- +"Marmalade, orange",-,-,Fruit,292,1220,"27,2","0,8","0,5",-,-,-,-,"0,0","70,0",-,-,"0,5","0,00","0,01","0,01",-,-,-,-,"5,0",-,-,-,15,40,"30,0","15,0","0,5","5,00",-,-,-,-,- +"Melons, cantaloupe, fresh",7.5,-,Fruit,47,197,"84,3","0,9","0,3","0,0","0,0","0,0",-,"0,0","7,0","6,5",-,"6,5","1,00","0,04","0,05",-,"0,06",1,"0,00","27,0","0,0",-,-,15,200,"15,0","10,0","0,8","10,00","0,03","0,20",-,-,- +"Nectarines, fresh",7.8,-,Fruit,36,154,"89,2","1,0","0,0","0,0","0,0","0,0",-,"0,0","7,5","7,0",-,"1,3","0,03","0,06","0,04",-,"0,02",0,"0,00","5,0","0,0",-,-,1,175,"10,0","50,0","0,4","10,00",-,"0,10",-,-,- +"Oranges, fresh",8.5,-,Fruit,44,186,"85,2","1,0","0,2","0,0","0,0","0,0",-,"0,0","9,5","8,3",-,"2,1","0,01","0,08","0,04",-,"0,05",20,"0,00","50,0","0,0",-,-,1,170,"40,0","25,0","0,4","10,00","0,06","0,10",-,-,- +"Papayas, fresh",6.3,-,Fruit,45,191,"87,2","0,6","0,1","0,0","0,0","0,0",-,"0,0","9,9","7,5",-,"1,2","0,04","0,04","0,04",-,"0,04",1,"0,00","55,0","0,0",-,-,2,150,"20,0","16,0","0,3","10,00","0,08","0,30",-,-,- +"Passion fruit, purple, fresh",7.1,-,Fruit,120,504,"66,2","2,3","0,7","0,0","0,0","0,0",-,"0,0","22,3","12,0",-,"7,5","0,21","0,02","0,10",-,"0,10",-,"0,00","20,0","0,0",-,-,30,350,"15,0","60,0","1,6","30,00","0,10","0,80",-,-,- +"Peaches, fresh",8.0,-,Fruit,39,162,"87,5","0,8","0,1","0,0","0,0","0,0",-,"0,0","8,7","8,2",-,"0,0","0,01","0,01","0,02",-,"0,02",2,"0,00","9,0","0,0",-,-,1,205,"7,0","23,0","0,2","8,00","0,06","0,10",-,-,- +"Pears, canned, heavy syrup",5.7,-,Fruit,76,322,"79,1","0,3","0,1","0,0","0,0","0,0",-,"0,0","17,5","17,0",-,"2,0","0,00","0,01","0,02",-,"0,01",2,"0,00","2,0","0,0",-,-,6,65,"10,0","8,0","0,4","5,00","0,04","0,06",-,-,- +"Pears, fresh",8.4,-,Fruit,54,229,"84,4","0,5","0,3","0,0","0,0","0,0",-,"0,0","11,0","10,5",-,"2,8","0,00","0,01","0,02",-,"0,01",1,"0,00","3,0","0,0",-,-,6,65,"7,0","8,0","0,4","5,00","0,04","0,07",-,-,- +"Pineapple, fresh",8.4,-,Fruit,61,256,"83,4","0,5","0,2","0,0","0,0","0,0",-,"0,0","13,5","12,4",-,"1,4","0,00","0,06","0,02",-,"0,08",4,"0,00","23,0","0,0",-,-,2,170,"16,0","9,0","0,4","20,00","0,10","0,10",-,-,- +"PlantaIns, fresh",6.1,-,Fruit,137,575,"64,4","1,0","0,2","0,0","0,0","0,0",-,"0,0","31,5","24,0",-,"1,9","0,20","0,05","0,04",-,"0,40",10,"0,00","15,0","0,0",-,-,5,350,"5,0","30,0","1,0","30,00",-,"0,15",-,-,- +"Plums, dried, uncooked",5.8,-,Fruit,221,927,"36,0","2,0","0,0","0,0","0,0","0,0",-,"0,0","45,0","45,0",-,"16,0","0,03","0,30","0,06",-,"0,25",2,"0,00","0,0","0,0",-,-,10,800,"30,0","60,0","1,0","30,00","0,15","0,50",-,-,- +"Plums, fresh",6.4,-,Fruit,60,255,"83,6","0,8","0,6",-,-,-,-,-,"12,0","11,5",-,"2,0","0,06","0,02","0,03",-,"0,10",2,"0,00","9,5","0,0",-,-,0,200,"10,0","20,0","0,2","10,00","0,30","0,10",-,-,- +"Plums, mixed varieties, fresh",7.5,-,Fruit,56,235,"84,5","0,6","0,2","0,0","0,0","0,0",-,"0,0","12,0","7,6",-,"1,7","0,06","0,02","0,04",-,"0,04",2,"0,00","5,5","0,0",-,-,1,220,"15,0","20,0","0,5","10,00","0,06","0,08",-,-,- +"Pomegranate, fresh",6.0,-,Fruit,85,360,"77,3","1,0","0,7","0,2","0,1","0,2",-,"0,0","17,5","12,0",-,"2,5","0,01","0,05","0,02",-,"0,30",-,"0,00","6,5","0,0",-,-,3,200,"10,0","10,0","0,3","10,00","0,20","0,40",-,-,- +"Quinces, fresh",5.3,-,Fruit,69,291,"80,9","0,5","0,1","0,0","0,0","0,0",-,"0,0","15,5",-,-,"2,0","4,00",-,-,-,-,-,-,"15,0",-,-,-,4,-,"10,0",-,"0,7",-,-,-,-,-,- +"Raisins, mixed varieties",6.9,-,Fruit,329,1375,"13,0","2,0","0,0","0,0","0,0","0,0",-,"0,0","75,0","75,0",-,"9,0","0,00","0,07","0,03",-,"0,17",3,"0,00","0,0","0,0",-,-,20,650,"50,0","90,0","2,0","40,00","0,80","0,20",-,-,- +"Raisins, seedles",6.7,-,Fruit,291,1216,"24,5","2,5","0,1","0,0","0,0","0,0",-,"0,0","66,5","64,0",-,"5,4","0,00","0,12","0,05",-,"0,11",7,"0,00","1,0","0,0",-,-,20,800,"30,0","110,0","0,3","40,00","0,10","0,10",-,-,- +"Raspberries, fresh",7.3,-,Fruit,68,286,"76,0","1,2","0,5","0,1","0,1","0,3",-,"0,0","14,3","14,1",-,"7,0","0,00","0,02","0,05",-,"0,07",25,"0,00","25,0","0,0",-,-,0,170,"40,0","40,0","1,0","25,00","0,15","0,00",-,-,- +"Red berries, fresh",6.8,-,Fruit,51,213,"82,9","1,1","0,1","0,0","0,0","0,0",-,"0,0","7,9","6,0",-,"7,9","0,00","0,07","0,02",-,"0,07",10,"0,00","10,0","0,0",-,-,1,238,"29,0","27,0","1,0","12,00","0,10","0,20",-,-,- +"Rose hip, fresh",6.3,-,Fruit,95,395,"50,5","3,5","0,1",-,-,-,-,"0,0","19,5","16,0",-,-,-,-,-,-,-,-,-,"1250,0",-,-,-,150,290,"250,0","260,0","0,5",-,"1,50","1,00",-,-,- +"Strawberries, fresh",9.0,-,Fruit,36,154,"89,3","0,8","0,4","0,0","0,0","0,0",-,"0,0","6,5","5,4",-,"2,0","0,00","0,03","0,05",-,"0,06",65,"0,00","64,0","0,0",-,-,2,150,"25,0","30,0","0,9","15,00","0,12","0,12",-,-,- +"Tangerines, mandarin oranges, fresh",8.3,-,Fruit,49,209,"86,0","0,7","0,3","0,0","0,0","0,0",-,"0,0","10,1","1,7",-,"1,9","0,05","0,06","0,01",-,"0,03",10,"0,00","30,0","0,0",-,-,1,210,"33,0","20,0","0,0","7,00","0,00","0,12",-,-,- +"Watermelon, fresh",7.7,-,Fruit,37,159,"89,3","0,6","0,2","0,0","0,0","0,0",-,"0,0","7,8","7,5",-,"1,1","0,02","0,04","0,05",-,"0,07",1,"0,00","6,0","0,0",-,-,1,160,"10,0","10,0","0,4","10,00","0,00","0,10",-,-,- diff --git a/ai-ml/fruits_nutritional_facts_preprocessed.csv b/ai-ml/fruits_nutritional_facts_preprocessed.csv new file mode 100644 index 00000000..22fe136d --- /dev/null +++ b/ai-ml/fruits_nutritional_facts_preprocessed.csv @@ -0,0 +1,55 @@ +Name,Emotional value,Health value,Product group,Energy (kcal),Energy (kJoule),Water,Protein,Fat,Saturated fat,Monounsaturated fat,Polyunsaturated fat,Trans fat,Cholesterol,Carbohydrates,Sugars,Polysaccharides,Dietary fiber,Vitamin A (Retinol),Vitamin B1 (Thiamine),Vitamin B (Repulfvin),Vitamin B3 (Niacin),Vitamin B6 (Pyridoxine),Vitamin B11 (Folic acid),Vitamin B12 (Cobalamin),Vitamin C (Ascorbic Acid),Vitamin D (Calcifine),Vitamin E (Tocopherol),Vitamin K (Phyloquinone),Sodium (Na),Potassium (K),Calcium (Ca),Phosphorus (P),Iron (Fe),Magnesium (Mg),Copper (Cu),Zinc (Zn),Selenium (Se),Iodine (I),Manganese (Mn) +"Apple, fresh",8.4,,Fruit,54.0,229.0,84.3,0.4,0.0,0.0,0.0,0.0,,0.0,12.0,11.8,,2.3,0.0,0.02,0.01,,0.05,3.0,0.0,10.0,0.0,,,2.0,120.0,4.0,10.0,0.2,5.0,0.1,0.04,,, +"Applesauce, canned, sweetened",7.2,,Fruit,81.0,341.0,78.5,0.2,0.1,0.0,0.0,0.0,,0.0,19.2,19.2,,1.0,0.0,0.02,0.01,,0.05,0.0,0.0,2.0,0.0,,,3.0,114.0,4.0,7.0,0.1,3.0,0.01,0.02,,, +"Apricot, canned, no skin",5.9,,Fruit,74.0,310.0,80.3,0.5,0.1,0.0,0.0,0.0,,0.0,17.0,16.0,,1.1,0.1,0.02,0.02,,0.05,1.0,0.0,4.0,0.0,,,13.0,196.0,11.0,15.0,0.7,5.0,0.05,0.1,,, +"Apricot, dried",6.2,,Fruit,274.0,1145.0,26.9,5.0,0.1,0.0,0.0,0.0,,0.0,58.5,55.8,,8.5,0.18,0.17,0.12,,0.17,7.0,0.0,10.0,0.0,,,10.0,1400.0,80.0,110.0,4.4,60.0,0.8,0.4,,, +"Apricot, fresh",7.4,,Fruit,60.0,252.0,83.4,1.4,0.4,0.0,0.0,0.0,,0.0,11.5,7.7,,2.3,0.18,0.06,0.05,,0.06,4.0,0.0,9.0,0.0,,,0.0,280.0,16.0,20.0,0.0,10.0,0.0,0.0,,, +"Bananas, fresh",8.0,,Fruit,86.0,363.0,76.8,1.2,0.2,0.0,0.0,0.1,,0.0,18.8,18.2,,2.0,0.01,0.04,0.06,,0.37,15.0,0.0,12.0,0.0,,,1.0,400.0,8.7,28.0,0.6,30.0,0.13,0.22,,, +Black currant,7.3,,Fruit,47.0,200.0,85.6,1.2,0.2,0.0,0.0,0.0,,0.0,8.5,7.0,,3.5,0.12,0.03,0.04,,0.05,8.0,0.0,150.0,0.0,,,5.0,190.0,45.0,30.0,0.9,15.0,0.15,0.3,,, +"Blackberries, raw",7.8,,Fruit,58.0,243.0,83.1,1.2,1.0,0.1,0.1,0.6,,0.0,8.5,6.8,,5.2,0.12,0.03,0.04,,0.05,20.0,0.0,17.0,0.0,,,3.0,190.0,45.0,30.0,0.9,30.0,0.7,0.0,,, +Blueberries,6.7,,Fruit,75.0,316.0,77.1,0.6,0.2,0.0,0.0,0.0,,0.0,14.5,14.0,,6.6,0.02,0.02,0.02,,0.06,10.0,0.0,12.0,0.0,,,4.0,60.0,12.0,16.0,2.5,5.0,0.07,0.2,,, +"Cherries, red, sour, fresh",5.8,,Fruit,52.0,220.0,84.9,1.0,0.0,0.0,0.0,0.0,,0.0,11.0,11.0,,2.1,0.0,0.02,0.02,,0.04,4.0,0.0,10.0,0.0,,,2.0,190.0,10.0,20.0,0.4,10.0,0.06,0.1,,, +"Cherries, red, sweet, fresh",8.0,,Fruit,54.0,228.0,84.9,0.5,0.1,0.0,0.0,0.0,,0.0,12.0,11.5,,1.5,0.01,0.02,0.02,,0.03,4.0,0.0,5.0,0.0,,,0.0,240.0,10.0,30.0,0.4,10.0,0.1,0.1,,, +"Cranberries, dried, sweetened",5.6,,Fruit,353.0,1475.0,9.1,1.2,0.0,0.0,0.0,0.0,,0.0,83.5,75.0,,5.2,,,,,,,,,,,,,,,,,,,,,, +"Cranberries, fresh",6.0,,Fruit,24.0,104.0,91.0,0.5,0.0,0.0,0.0,0.0,,0.0,4.0,3.5,,3.5,0.0,0.03,0.02,,0.06,2.0,0.0,17.0,0.0,,,1.0,80.0,10.0,10.0,0.5,5.0,0.06,0.12,,, +"Currants, red, fresh",6.8,,Fruit,38.0,163.0,85.1,1.0,0.0,0.0,0.0,0.0,,0.0,4.8,4.2,,8.1,0.0,0.07,0.02,,0.06,10.0,0.0,10.0,0.0,,,1.0,300.0,20.0,40.0,1.0,10.0,0.1,0.15,,, +"Date, candied",5.0,,Fruit,286.0,1195.0,24.2,1.8,0.0,0.0,0.0,0.0,,0.0,65.0,63.5,,8.0,7.0,0.04,0.07,,0.13,4.0,0.0,3.0,0.0,,,35.0,650.0,46.0,57.0,1.9,50.0,0.3,0.3,,, +"Date, fresh",6.3,,Fruit,152.0,638.0,58.8,2.2,0.0,0.0,0.0,0.0,,0.0,33.0,29.0,,5.0,0.0,0.05,0.06,,0.1,20.0,0.0,15.0,0.0,,,10.0,400.0,25.0,30.0,0.3,20.0,0.1,0.2,,, +"Figs, dried, uncooked",5.7,,Fruit,270.0,1128.0,26.7,3.5,1.3,0.0,0.0,0.0,,0.0,54.0,54.0,,13.5,0.01,0.12,0.09,,0.4,7.0,0.0,2.0,0.0,,,40.0,850.0,190.0,110.0,3.3,75.0,0.4,0.9,,, +"Figs, fresh",5.9,,Fruit,64.0,269.0,82.3,1.4,0.2,0.0,0.0,0.0,,0.0,13.0,13.0,,2.1,0.03,0.05,0.05,,0.11,,0.0,2.8,0.0,,,2.0,240.0,55.0,30.0,0.6,17.0,0.07,0.25,,, +"Gooseberries, fresh",7.0,,Fruit,36.0,153.0,90.0,1.0,0.0,0.0,0.0,0.0,,0.0,8.0,7.8,,0.0,0.01,0.02,0.01,,0.08,6.0,0.0,30.0,0.0,,,5.0,200.0,30.0,30.0,1.0,10.0,0.05,0.1,,, +"Grapefruit, fresh",7.4,,Fruit,45.0,191.0,87.2,0.6,0.2,0.0,0.0,0.0,,0.0,9.5,7.5,,1.5,0.0,0.05,0.02,,0.03,15.0,0.0,45.0,0.0,,,1.0,180.0,20.0,17.0,0.3,10.0,0.04,0.17,,, +"Grapes, blue, fresh",7.6,,Fruit,53.0,224.0,85.2,0.9,0.1,0.0,0.0,0.0,,0.0,11.5,11.0,,1.3,0.01,0.03,0.02,,0.1,1.0,0.0,2.5,0.0,,,0.0,190.0,20.0,15.0,0.4,12.0,0.1,0.08,,, +"Grapes, green/white, fresh",7.9,,Fruit,57.0,242.0,84.5,0.5,0.0,0.0,0.0,0.0,,0.0,13.5,13.3,,0.5,0.01,0.04,0.02,,0.1,1.0,0.0,2.0,0.0,,,1.0,180.0,10.0,16.0,0.5,5.0,0.1,0.1,,, +"Guava, fresh",5.2,,Fruit,46.0,195.0,85.8,0.9,0.5,0.0,0.0,0.0,,0.0,7.5,6.0,,4.3,0.07,0.04,0.04,,0.1,,0.0,220.0,0.0,,,5.0,230.0,10.0,25.0,0.4,10.0,0.1,0.2,,, +"Khaki-fruit, fresh",4.2,,Fruit,78.0,327.0,79.5,0.5,0.0,0.0,0.0,0.0,,0.0,18.5,18.3,,0.5,0.2,0.02,0.03,,,,0.0,20.0,0.0,,,1.0,180.0,10.0,20.0,0.4,10.0,0.1,0.1,,, +"Kiwi, fresh",8.4,,Fruit,48.0,205.0,86.8,1.0,0.2,0.0,0.0,0.0,,0.0,10.3,7.5,,0.7,0.06,0.02,0.05,,0.1,20.0,0.0,65.0,0.0,,,4.0,300.0,40.0,30.0,0.8,10.0,0.12,0.1,,, +Kumkwat,4.7,,Fruit,71.0,301.0,80.5,1.0,0.0,0.0,0.0,0.0,,0.0,16.0,15.8,,1.5,0.01,0.15,0.06,,0.06,,0.0,60.0,0.0,,,5.0,180.0,60.0,20.0,0.7,10.0,0.11,0.1,,, +"Lemon, pealed, fresh",6.1,,Fruit,36.0,151.0,89.4,0.5,0.4,0.0,0.0,0.0,,0.0,6.5,3.0,,2.2,0.0,0.06,0.02,,0.04,8.0,0.0,45.0,0.0,,,2.0,150.0,10.0,20.0,0.1,10.0,0.05,0.1,,, +"Limes, fresh",6.0,,Fruit,32.0,135.0,93.0,0.5,2.5,,,,,0.0,2.0,1.9,,,,0.03,0.02,,,,,45.0,,,,2.0,80.0,15.0,11.0,0.2,,,,,, +"Lychee, fresh",6.8,,Fruit,81.0,342.0,78.6,0.9,0.3,0.0,0.0,0.0,,0.0,18.0,16.8,,1.2,0.0,0.05,0.05,,0.04,,0.0,39.0,0.0,,,2.0,180.0,10.0,35.0,0.4,20.0,0.15,0.18,,, +"Mangos, fresh",7.4,,Fruit,61.0,259.0,83.3,1.1,0.3,0.0,0.0,0.1,,0.0,12.8,12.2,,1.5,0.1,0.05,0.05,,0.04,20.0,0.0,36.0,0.0,,,5.0,190.0,12.0,12.0,0.2,10.0,0.1,0.07,,, +"Marmalade, orange",,,Fruit,292.0,1220.0,27.2,0.8,0.5,,,,,0.0,70.0,,,0.5,0.0,0.01,0.01,,,,,5.0,,,,15.0,40.0,30.0,15.0,0.5,5.0,,,,, +"Melons, cantaloupe, fresh",7.5,,Fruit,47.0,197.0,84.3,0.9,0.3,0.0,0.0,0.0,,0.0,7.0,6.5,,6.5,1.0,0.04,0.05,,0.06,1.0,0.0,27.0,0.0,,,15.0,200.0,15.0,10.0,0.8,10.0,0.03,0.2,,, +"Nectarines, fresh",7.8,,Fruit,36.0,154.0,89.2,1.0,0.0,0.0,0.0,0.0,,0.0,7.5,7.0,,1.3,0.03,0.06,0.04,,0.02,0.0,0.0,5.0,0.0,,,1.0,175.0,10.0,50.0,0.4,10.0,,0.1,,, +"Oranges, fresh",8.5,,Fruit,44.0,186.0,85.2,1.0,0.2,0.0,0.0,0.0,,0.0,9.5,8.3,,2.1,0.01,0.08,0.04,,0.05,20.0,0.0,50.0,0.0,,,1.0,170.0,40.0,25.0,0.4,10.0,0.06,0.1,,, +"Papayas, fresh",6.3,,Fruit,45.0,191.0,87.2,0.6,0.1,0.0,0.0,0.0,,0.0,9.9,7.5,,1.2,0.04,0.04,0.04,,0.04,1.0,0.0,55.0,0.0,,,2.0,150.0,20.0,16.0,0.3,10.0,0.08,0.3,,, +"Passion fruit, purple, fresh",7.1,,Fruit,120.0,504.0,66.2,2.3,0.7,0.0,0.0,0.0,,0.0,22.3,12.0,,7.5,0.21,0.02,0.1,,0.1,,0.0,20.0,0.0,,,30.0,350.0,15.0,60.0,1.6,30.0,0.1,0.8,,, +"Peaches, fresh",8.0,,Fruit,39.0,162.0,87.5,0.8,0.1,0.0,0.0,0.0,,0.0,8.7,8.2,,0.0,0.01,0.01,0.02,,0.02,2.0,0.0,9.0,0.0,,,1.0,205.0,7.0,23.0,0.2,8.0,0.06,0.1,,, +"Pears, canned, heavy syrup",5.7,,Fruit,76.0,322.0,79.1,0.3,0.1,0.0,0.0,0.0,,0.0,17.5,17.0,,2.0,0.0,0.01,0.02,,0.01,2.0,0.0,2.0,0.0,,,6.0,65.0,10.0,8.0,0.4,5.0,0.04,0.06,,, +"Pears, fresh",8.4,,Fruit,54.0,229.0,84.4,0.5,0.3,0.0,0.0,0.0,,0.0,11.0,10.5,,2.8,0.0,0.01,0.02,,0.01,1.0,0.0,3.0,0.0,,,6.0,65.0,7.0,8.0,0.4,5.0,0.04,0.07,,, +"Pineapple, fresh",8.4,,Fruit,61.0,256.0,83.4,0.5,0.2,0.0,0.0,0.0,,0.0,13.5,12.4,,1.4,0.0,0.06,0.02,,0.08,4.0,0.0,23.0,0.0,,,2.0,170.0,16.0,9.0,0.4,20.0,0.1,0.1,,, +"PlantaIns, fresh",6.1,,Fruit,137.0,575.0,64.4,1.0,0.2,0.0,0.0,0.0,,0.0,31.5,24.0,,1.9,0.2,0.05,0.04,,0.4,10.0,0.0,15.0,0.0,,,5.0,350.0,5.0,30.0,1.0,30.0,,0.15,,, +"Plums, dried, uncooked",5.8,,Fruit,221.0,927.0,36.0,2.0,0.0,0.0,0.0,0.0,,0.0,45.0,45.0,,16.0,0.03,0.3,0.06,,0.25,2.0,0.0,0.0,0.0,,,10.0,800.0,30.0,60.0,1.0,30.0,0.15,0.5,,, +"Plums, fresh",6.4,,Fruit,60.0,255.0,83.6,0.8,0.6,,,,,,12.0,11.5,,2.0,0.06,0.02,0.03,,0.1,2.0,0.0,9.5,0.0,,,0.0,200.0,10.0,20.0,0.2,10.0,0.3,0.1,,, +"Plums, mixed varieties, fresh",7.5,,Fruit,56.0,235.0,84.5,0.6,0.2,0.0,0.0,0.0,,0.0,12.0,7.6,,1.7,0.06,0.02,0.04,,0.04,2.0,0.0,5.5,0.0,,,1.0,220.0,15.0,20.0,0.5,10.0,0.06,0.08,,, +"Pomegranate, fresh",6.0,,Fruit,85.0,360.0,77.3,1.0,0.7,0.2,0.1,0.2,,0.0,17.5,12.0,,2.5,0.01,0.05,0.02,,0.3,,0.0,6.5,0.0,,,3.0,200.0,10.0,10.0,0.3,10.0,0.2,0.4,,, +"Quinces, fresh",5.3,,Fruit,69.0,291.0,80.9,0.5,0.1,0.0,0.0,0.0,,0.0,15.5,,,2.0,4.0,,,,,,,15.0,,,,4.0,,10.0,,0.7,,,,,, +"Raisins, mixed varieties",6.9,,Fruit,329.0,1375.0,13.0,2.0,0.0,0.0,0.0,0.0,,0.0,75.0,75.0,,9.0,0.0,0.07,0.03,,0.17,3.0,0.0,0.0,0.0,,,20.0,650.0,50.0,90.0,2.0,40.0,0.8,0.2,,, +"Raisins, seedles",6.7,,Fruit,291.0,1216.0,24.5,2.5,0.1,0.0,0.0,0.0,,0.0,66.5,64.0,,5.4,0.0,0.12,0.05,,0.11,7.0,0.0,1.0,0.0,,,20.0,800.0,30.0,110.0,0.3,40.0,0.1,0.1,,, +"Raspberries, fresh",7.3,,Fruit,68.0,286.0,76.0,1.2,0.5,0.1,0.1,0.3,,0.0,14.3,14.1,,7.0,0.0,0.02,0.05,,0.07,25.0,0.0,25.0,0.0,,,0.0,170.0,40.0,40.0,1.0,25.0,0.15,0.0,,, +"Red berries, fresh",6.8,,Fruit,51.0,213.0,82.9,1.1,0.1,0.0,0.0,0.0,,0.0,7.9,6.0,,7.9,0.0,0.07,0.02,,0.07,10.0,0.0,10.0,0.0,,,1.0,238.0,29.0,27.0,1.0,12.0,0.1,0.2,,, +"Rose hip, fresh",6.3,,Fruit,95.0,395.0,50.5,3.5,0.1,,,,,0.0,19.5,16.0,,,,,,,,,,1250.0,,,,150.0,290.0,250.0,260.0,0.5,,1.5,1.0,,, +"Strawberries, fresh",9.0,,Fruit,36.0,154.0,89.3,0.8,0.4,0.0,0.0,0.0,,0.0,6.5,5.4,,2.0,0.0,0.03,0.05,,0.06,65.0,0.0,64.0,0.0,,,2.0,150.0,25.0,30.0,0.9,15.0,0.12,0.12,,, +"Tangerines, mandarin oranges, fresh",8.3,,Fruit,49.0,209.0,86.0,0.7,0.3,0.0,0.0,0.0,,0.0,10.1,1.7,,1.9,0.05,0.06,0.01,,0.03,10.0,0.0,30.0,0.0,,,1.0,210.0,33.0,20.0,0.0,7.0,0.0,0.12,,, +"Watermelon, fresh",7.7,,Fruit,37.0,159.0,89.3,0.6,0.2,0.0,0.0,0.0,,0.0,7.8,7.5,,1.1,0.02,0.04,0.05,,0.07,1.0,0.0,6.0,0.0,,,1.0,160.0,10.0,10.0,0.4,10.0,0.0,0.1,,,