Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions src/snippets/python/first_iteration.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

cuisines: pd.DataFrame = pd.read_csv("./cuisines.csv", index_col=0)
cuisines: pd.DataFrame = pd.read_csv("./cuisines.csv")

indian_cuisines: pd.DataFrame = cuisines[cuisines["cuisine"] == "indian"]

ingredient_counts_series: pd.Series = indian_cuisines.T.drop(["cuisine"]).sum(axis=1)
indian_cuisine_ingredient_counts: pd.Series = indian_cuisines.groupby("ingredient").size()

indian_cuisine_counts: pd.DataFrame = ingredient_counts_series.to_frame("value")
sorted_ingredients: pd.Series = indian_cuisine_ingredient_counts.sort_values(ascending=False)

indian_cuisine_present_ingredients: pd.DataFrame = indian_cuisine_counts[
indian_cuisine_counts["value"] != 0
]

sorted_indian_cuisine_ingredients: pd.DataFrame = indian_cuisine_present_ingredients.sort_values(
by="value", ascending=False
)

sorted_indian_cuisine_ingredients.head(10).plot.barh(
title="Top 10 Most Common Indian Ingredients"
)
sorted_ingredients.head(10).plot.barh(title="Top 10 Most Common Indian Ingredients")
plt.xlabel("Count")
plt.ylabel("Ingredient")
plt.show()
plt.gca().invert_yaxis()
plt.show()
31 changes: 10 additions & 21 deletions src/snippets/python/second_iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

def main() -> None:
# Load the dataset
cuisines_df: pd.DataFrame = pd.read_csv("./cuisines.csv", index_col=0)
cuisines_df: pd.DataFrame = pd.read_csv("./cuisines.csv")

# Get the list of unique cuisines
target_cuisines: np.ndarray[str] = cuisines_df["cuisine"].unique()

# For each cuisine, process and plot ingredient data
for cuisine in target_cuisines:
sorted_ingredients: pd.DataFrame = get_sorted_cuisine_ingredients(
sorted_ingredients: pd.Series = get_sorted_cuisine_ingredients(
cuisines_df, cuisine
)
plot_cuisine_ingredients(sorted_ingredients, cuisine)
Expand All @@ -21,35 +21,24 @@ def main() -> None:
def get_sorted_cuisine_ingredients(
df: pd.DataFrame,
cuisine_name: str,
) -> pd.DataFrame:
) -> pd.Series:
"""
Filters a DataFrame by cuisine and returns a sorted DataFrame of ingredient counts.
Filters a DataFrame by cuisine and returns a sorted Series of ingredient counts.
"""
# Filter rows matching the given cuisine (case-insensitive)
filtered = df[df["cuisine"].str.lower() == cuisine_name.lower()]
filtered: pd.DataFrame = df[df["cuisine"].str.lower() == cuisine_name.lower()]

# Sum the ingredient counts, dropping the 'cuisine' column
ingredient_totals: pd.Series = (
filtered
.T
.drop(["cuisine"])
.sum(axis=1)
)

# Convert to DataFrame for plotting
counts_df: pd.DataFrame = ingredient_totals.to_frame(name="value")

# Keep only ingredients that are actually used
present_ingredients: pd.DataFrame = counts_df[counts_df["value"] != 0]
# Count how many recipes use each ingredient
ingredient_counts: pd.Series = filtered.groupby("ingredient").size()

# Sort by frequency
sorted_ingredients = present_ingredients.sort_values(by="value", ascending=False)
sorted_ingredients: pd.Series = ingredient_counts.sort_values(ascending=False)

return sorted_ingredients


def plot_cuisine_ingredients(
df_to_plot: pd.DataFrame, cuisine_name: str, top_n: int = 10
ingredients: pd.Series, cuisine_name: str, top_n: int = 10
) -> None:
"""
Plots the top N most common ingredients for a given cuisine.
Expand All @@ -58,7 +47,7 @@ def plot_cuisine_ingredients(
title: str = f"Top {top_n} Most Common {cuisine_name.capitalize()} Ingredients"

# Plot horizontal bar chart
df_to_plot.head(top_n).plot.barh(title=title)
ingredients.head(top_n).plot.barh(title=title)
plt.xlabel("Count")
plt.ylabel("Ingredient")
plt.gca().invert_yaxis()
Expand Down