diff --git a/pornhub_lineplot_over_years.py b/pornhub_lineplot_over_years.py
new file mode 100644
index 0000000..8558a0a
--- /dev/null
+++ b/pornhub_lineplot_over_years.py
@@ -0,0 +1,95 @@
+import pandas as pd
+from collections import Counter
+import matplotlib.pyplot as plt
+
+# Read the CSV file
+df = pd.read_csv('porn-with-dates-2022.csv')
+
+# Convert the 'date' column to datetime
+df['date'] = pd.to_datetime(df['date'])
+
+# Determine the latest year in the dataset
+latest_year = int(df['date'].dt.year.max())
+
+# Initialize a dictionary to store the popularity of top tags for the latest year
+popularity_latest_year = {}
+popularity_latest_year_raw = {}
+
+# Filter data for the latest year
+df_latest_year = df[df['date'].dt.year == latest_year]
+
+# Get the total number of views for the latest year
+total_views_latest_year = df_latest_year['views'].sum()
+
+
+# Initialize an empty dictionary to store the total views for each tag
+tag_views_latest_year = {}
+
+# Iterate over each row in the latest year DataFrame
+for index, row in df_latest_year.iterrows():
+    # Convert the string representation of tags to a list
+    tags = eval(row['categories'])
+    
+    # Iterate over each tag in the list
+    for tag in tags:
+        # Add the number of views associated with the tag to the dictionary
+        tag_views_latest_year[tag] = tag_views_latest_year.get(tag, 0) + row['views']
+
+# Convert the dictionary to a pandas Series
+tag_views_latest_year = pd.Series(tag_views_latest_year)
+
+# Sort the Series by the total views in descending order
+tag_views_latest_year = tag_views_latest_year.sort_values(ascending=False)
+
+
+# Calculate the percentage of total views for each tag in the latest year
+for tag, count in tag_views_latest_year.items():
+    percentage = (count / total_views_latest_year) * 100
+    popularity_latest_year_raw[tag] = count
+    popularity_latest_year[tag] = percentage
+
+# Get the top 10 tags for the latest year
+top_tags_latest_year = pd.Series(popularity_latest_year).nlargest(10)
+top_tags_latest_year_raw = pd.Series(popularity_latest_year_raw).nlargest(10)
+
+
+# Print distribution of views among the top tags in the latest year
+print(f"Top 10 tags in {latest_year}:")
+top_tags = []
+top_views = []
+other_views = 0
+
+# Iterate over (tag, views) pairs in the top tags Series
+for tag, views in top_tags_latest_year_raw.items():
+    percentage = (views / total_views_latest_year) * 100
+    print(f"{tag}: {views} views ({percentage:.2f}% of total)")
+    top_tags.append(tag)
+    top_views.append(percentage)
+
+# Initialize a set to store video IDs associated with top 10 tags
+videos_with_top_tags = set()
+
+# Iterate over the top tags to collect video IDs
+for tag in top_tags_latest_year_raw.index:
+    # Get the DataFrame rows where the tag appears
+    rows_with_tag = df_latest_year[df_latest_year['categories'].str.contains(tag)]
+    # Add the IDs of these rows to the set
+    videos_with_top_tags.update(rows_with_tag['url'])
+
+# Calculate the total views for 'other' tags
+other_views = df_latest_year[~df_latest_year['url'].isin(videos_with_top_tags)]['views'].sum()
+
+top_tags.append('Other')
+percentage_other = (other_views / total_views_latest_year) * 100
+top_views.append(percentage_other)
+print(f"Other: {other_views} views ({percentage_other:.2f}% of total)")
+
+# Plot distribution of views among the top tags in the latest year
+plt.figure(figsize=(10, 6))
+plt.bar(top_tags, top_views)
+plt.xlabel('Tags')
+plt.ylabel('Number of Views (%)')
+plt.title(f'Distribution of Views Among Top Tags in {latest_year}')
+plt.xticks(rotation=45)
+plt.tight_layout()
+plt.show()
diff --git a/pornhub_tags_regression_over_years.py b/pornhub_tags_regression_over_years.py
new file mode 100644
index 0000000..89e5472
--- /dev/null
+++ b/pornhub_tags_regression_over_years.py
@@ -0,0 +1,70 @@
+import pandas as pd
+from sklearn.linear_model import LinearRegression
+import numpy as np
+from scipy.stats import linregress
+
+# Read the CSV file
+df = pd.read_csv('porn-with-dates-2022.csv')
+
+# Convert the 'date' column to datetime
+df['date'] = pd.to_datetime(df['date'])
+
+# Determine the unique tags in the dataset
+unique_tags = set()
+for categories in df['categories']:
+    unique_tags.update(eval(categories))
+
+# Determine the unique years in the dataset
+unique_years = sorted(df['date'].dt.year.unique())
+
+# Initialize a dictionary to store the p-values of linear regression for each tag
+tag_stats = {}
+
+# Iterate over each tag
+for tag in unique_tags:
+    # Initialize lists to store years and normalized views
+    years = []
+    normalized_views = []
+    
+    # Iterate over each year
+    for year in unique_years:
+        # Print out progress for the researchers own sanity
+        print(f'Running on tag "{tag}" in year {year}')
+        
+        # Filter data for the current year
+        total_year_data = df[(df['date'].dt.year == year)]
+        
+        # Calculate total views for the current year
+        total_year_views = total_year_data['views'].sum()
+        
+        # Filter data for the current tag and year
+        tag_year_data = df[(df['date'].dt.year == year) & df['categories'].apply(lambda x: tag in eval(x))]
+        
+        # Calculate total views for current tag in current year
+        tag_year_views = tag_year_data['views'].sum()
+        
+        # Append the year
+        years.append(year)
+        
+        # Append the normalized views for the current year
+        normalized_views.append(tag_year_views / total_year_views if total_year_views != 0 else 0)
+    
+    # Perform linear regression on years and normalized views
+    slope, _, r_value, p_value, _ = linregress(years, normalized_views)
+    
+    # Store the statistics of linear regression for the current tag
+    tag_stats[tag] = {'slope': slope, 'r_value': r_value, 'p_value': p_value}
+
+
+# Save the list of significant tags along with their statistics to a tab-delimited file
+significant_tags = []
+with open('significant_tags.txt', 'w') as file:
+    file.write('Tag\tSlope\tR Value\tP Value\n')
+    for tag, stats in tag_stats.items():
+        if stats['p_value'] < 0.05:
+            file.write(f"{tag}\t{stats['slope']}\t{stats['r_value']}\t{stats['p_value']}\n")
+            significant_tags.append(tag)
+
+print("Tags with significant changes in view count over years:")
+for tag in significant_tags:
+    print(tag)
diff --git a/total_views_over_years_pornhub.py b/total_views_over_years_pornhub.py
new file mode 100644
index 0000000..8558a0a
--- /dev/null
+++ b/total_views_over_years_pornhub.py
@@ -0,0 +1,95 @@
+import pandas as pd
+from collections import Counter
+import matplotlib.pyplot as plt
+
+# Read the CSV file
+df = pd.read_csv('porn-with-dates-2022.csv')
+
+# Convert the 'date' column to datetime
+df['date'] = pd.to_datetime(df['date'])
+
+# Determine the latest year in the dataset
+latest_year = int(df['date'].dt.year.max())
+
+# Initialize a dictionary to store the popularity of top tags for the latest year
+popularity_latest_year = {}
+popularity_latest_year_raw = {}
+
+# Filter data for the latest year
+df_latest_year = df[df['date'].dt.year == latest_year]
+
+# Get the total number of views for the latest year
+total_views_latest_year = df_latest_year['views'].sum()
+
+
+# Initialize an empty dictionary to store the total views for each tag
+tag_views_latest_year = {}
+
+# Iterate over each row in the latest year DataFrame
+for index, row in df_latest_year.iterrows():
+    # Convert the string representation of tags to a list
+    tags = eval(row['categories'])
+    
+    # Iterate over each tag in the list
+    for tag in tags:
+        # Add the number of views associated with the tag to the dictionary
+        tag_views_latest_year[tag] = tag_views_latest_year.get(tag, 0) + row['views']
+
+# Convert the dictionary to a pandas Series
+tag_views_latest_year = pd.Series(tag_views_latest_year)
+
+# Sort the Series by the total views in descending order
+tag_views_latest_year = tag_views_latest_year.sort_values(ascending=False)
+
+
+# Calculate the percentage of total views for each tag in the latest year
+for tag, count in tag_views_latest_year.items():
+    percentage = (count / total_views_latest_year) * 100
+    popularity_latest_year_raw[tag] = count
+    popularity_latest_year[tag] = percentage
+
+# Get the top 10 tags for the latest year
+top_tags_latest_year = pd.Series(popularity_latest_year).nlargest(10)
+top_tags_latest_year_raw = pd.Series(popularity_latest_year_raw).nlargest(10)
+
+
+# Print distribution of views among the top tags in the latest year
+print(f"Top 10 tags in {latest_year}:")
+top_tags = []
+top_views = []
+other_views = 0
+
+# Iterate over (tag, views) pairs in the top tags Series
+for tag, views in top_tags_latest_year_raw.items():
+    percentage = (views / total_views_latest_year) * 100
+    print(f"{tag}: {views} views ({percentage:.2f}% of total)")
+    top_tags.append(tag)
+    top_views.append(percentage)
+
+# Initialize a set to store video IDs associated with top 10 tags
+videos_with_top_tags = set()
+
+# Iterate over the top tags to collect video IDs
+for tag in top_tags_latest_year_raw.index:
+    # Get the DataFrame rows where the tag appears
+    rows_with_tag = df_latest_year[df_latest_year['categories'].str.contains(tag)]
+    # Add the IDs of these rows to the set
+    videos_with_top_tags.update(rows_with_tag['url'])
+
+# Calculate the total views for 'other' tags
+other_views = df_latest_year[~df_latest_year['url'].isin(videos_with_top_tags)]['views'].sum()
+
+top_tags.append('Other')
+percentage_other = (other_views / total_views_latest_year) * 100
+top_views.append(percentage_other)
+print(f"Other: {other_views} views ({percentage_other:.2f}% of total)")
+
+# Plot distribution of views among the top tags in the latest year
+plt.figure(figsize=(10, 6))
+plt.bar(top_tags, top_views)
+plt.xlabel('Tags')
+plt.ylabel('Number of Views (%)')
+plt.title(f'Distribution of Views Among Top Tags in {latest_year}')
+plt.xticks(rotation=45)
+plt.tight_layout()
+plt.show()
diff --git a/total_views_over_years_xhamster.py b/total_views_over_years_xhamster.py
new file mode 100644
index 0000000..4dcfa68
--- /dev/null
+++ b/total_views_over_years_xhamster.py
@@ -0,0 +1,95 @@
+import pandas as pd
+from collections import Counter
+import matplotlib.pyplot as plt
+
+# Read the CSV file
+df = pd.read_csv('xhamster.csv')
+
+# Convert the 'upload_date' column to datetime
+df['upload_date'] = pd.to_datetime(df['upload_date'])
+
+# Determine the latest year in the dataset
+latest_year = int(df['upload_date'].dt.year.max())
+
+# Initialize a dictionary to store the popularity of top tags for the latest year
+popularity_latest_year = {}
+popularity_latest_year_raw = {}
+
+# Filter data for the latest year
+df_latest_year = df[df['upload_date'].dt.year == latest_year]
+
+# Get the total number of views for the latest year
+total_views_latest_year = df_latest_year['nb_views'].sum()
+
+
+# Initialize an empty dictionary to store the total views for each tag
+tag_views_latest_year = {}
+
+# Iterate over each row in the latest year DataFrame
+for index, row in df_latest_year.iterrows():
+    # Convert the string representation of tags to a list
+    tags = eval(row['channels'])
+    
+    # Iterate over each tag in the list
+    for tag in tags:
+        # Add the number of views associated with the tag to the dictionary
+        tag_views_latest_year[tag] = tag_views_latest_year.get(tag, 0) + row['nb_views']
+
+# Convert the dictionary to a pandas Series
+tag_views_latest_year = pd.Series(tag_views_latest_year)
+
+# Sort the Series by the total views in descending order
+tag_views_latest_year = tag_views_latest_year.sort_values(ascending=False)
+
+
+# Calculate the percentage of total views for each tag in the latest year
+for tag, count in tag_views_latest_year.items():
+    percentage = (count / total_views_latest_year) * 100
+    popularity_latest_year_raw[tag] = count
+    popularity_latest_year[tag] = percentage
+
+# Get the top 10 tags for the latest year
+top_tags_latest_year = pd.Series(popularity_latest_year).nlargest(10)
+top_tags_latest_year_raw = pd.Series(popularity_latest_year_raw).nlargest(10)
+
+
+# Print distribution of views among the top tags in the latest year
+print(f"Top 10 tags in {latest_year}:")
+top_tags = []
+top_views = []
+other_views = 0
+
+# Iterate over (tag, views) pairs in the top tags Series
+for tag, views in top_tags_latest_year_raw.items():
+    percentage = (views / total_views_latest_year) * 100
+    print(f"{tag}: {views} views ({percentage:.2f}% of total)")
+    top_tags.append(tag)
+    top_views.append(percentage)
+
+# Initialize a set to store video IDs associated with top 10 tags
+videos_with_top_tags = set()
+
+# Iterate over the top tags to collect video IDs
+for tag in top_tags_latest_year_raw.index:
+    # Get the DataFrame rows where the tag appears
+    rows_with_tag = df_latest_year[df_latest_year['channels'].str.contains(tag)]
+    # Add the IDs of these rows to the set
+    videos_with_top_tags.update(rows_with_tag['id'])
+
+# Calculate the total views for 'other' tags
+other_views = df_latest_year[~df_latest_year['id'].isin(videos_with_top_tags)]['nb_views'].sum()
+
+top_tags.append('Other')
+percentage_other = (other_views / total_views_latest_year) * 100
+top_views.append(percentage_other)
+print(f"Other: {other_views} views ({percentage_other:.2f}% of total)")
+
+# Plot distribution of views among the top tags in the latest year
+plt.figure(figsize=(10, 6))
+plt.bar(top_tags, top_views)
+plt.xlabel('Tags')
+plt.ylabel('Number of Views (%)')
+plt.title(f'Distribution of Views Among Top Tags in {latest_year}')
+plt.xticks(rotation=45)
+plt.tight_layout()
+plt.show()
diff --git a/xhamster_views_over_years_lineplot.py b/xhamster_views_over_years_lineplot.py
new file mode 100644
index 0000000..1662330
--- /dev/null
+++ b/xhamster_views_over_years_lineplot.py
@@ -0,0 +1,54 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import calendar
+import os
+
+# Read the CSV file
+df = pd.read_csv('xhamster.csv')
+
+# Filter out non-date values from the 'date' column
+df = df.loc[pd.to_datetime(df['upload_date'], errors='coerce').notnull()]
+
+# # Convert the 'date' column to datetime
+# df.loc[:, 'upload_date'] = pd.to_datetime(df['upload_date'])
+
+# Assuming the date format is in YYYY-MM-DD
+df['upload_date'] = pd.to_datetime(df['upload_date'], format='%Y-%m-%d')
+
+# Group the data by month and year and calculate the total views for each month
+monthly_views = df.groupby([df['upload_date'].dt.year.rename('year'), df['upload_date'].dt.month.rename('month')])['nb_views'].sum().reset_index()
+
+# Reset the index to have separate columns for year and month
+monthly_views = monthly_views.reset_index()
+
+# Drop the duplicate index column
+monthly_views = monthly_views.drop(columns=['index'])
+
+# Rename the columns for clarity
+monthly_views.columns = ['Year', 'Month', 'Total Views']
+
+# Create a pivot table to have months as columns and years as rows
+pivot_monthly_views = monthly_views.pivot(index='Year', columns='Month', values='Total Views')
+
+# Plot the total views for each month, with each year represented as a different colored line
+plt.figure(figsize=(12, 8))
+for year in pivot_monthly_views.index:
+    plt.plot(pivot_monthly_views.columns, pivot_monthly_views.loc[year], label=str(year))
+
+plt.xlabel('Month')
+plt.ylabel('Total Views')
+plt.title('Total Views of Each Month Across Years')
+plt.legend()
+plt.grid(True)
+plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
+
+# # Specify the directory path
+# save_dir = os.path.join('..', 'reports')  # Goes one level above and into 'reports' directory
+
+# # Create the directory if it doesn't exist
+# os.makedirs(save_dir, exist_ok=True)
+
+# # Save the plot in the specified directory
+# plt.savefig(os.path.join(save_dir, 'xhamster_views_over_years_lineplot.png'))
+
+plt.show()
\ No newline at end of file