-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathml10
More file actions
73 lines (50 loc) · 2.62 KB
/
ml10
File metadata and controls
73 lines (50 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//pip install mlxtend
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
# Sample dataset in the format of transactions
data = {'TransactionID': [1, 2, 3, 4],
'Items': [['Milk', 'Bread', 'Butter'],
['Beer', 'Diapers', 'Bread'],
['Milk', 'Bread', 'Butter', 'Beer'],
['Diapers', 'Milk', 'Bread']]}
df = pd.DataFrame(data)
# One hot encoding (each item is represented as a binary value in the transaction)
encoded_data = df['Items'].str.join('|').str.get_dummies()
# Convert the one-hot encoded DataFrame to boolean type to avoid the warning
encoded_data = encoded_data.astype(bool)
# Apply Apriori algorithm with minimum support of 50%
frequent_itemsets = apriori(encoded_data, min_support=0.5, use_colnames=True)
# Display frequent itemsets
print("Frequent Itemsets:")
print(frequent_itemsets)
# Generate association rules with minimum confidence of 70%
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
# Display the association rules
print("\nAssociation Rules:")
print(rules)
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
# Load your dataset
# Assuming a CSV file with 'TransactionID' and 'Items' columns where 'Items' is a list of items in each transaction
df = pd.read_csv('market_basket.csv') # Replace with your file path
# Preview the dataset
print("First few rows of the dataset:\n", df.head())
# Perform One-Hot Encoding to convert transactions into a binary matrix
# Assume 'Items' column contains a list of items purchased in each transaction
encoded_data = df['Items'].str.join('|').str.get_dummies()
# Convert the one-hot encoded DataFrame to boolean type
encoded_data = encoded_data.astype(bool)
# Apply the Apriori algorithm with a minimum support threshold
frequent_itemsets = apriori(encoded_data, min_support=0.05, use_colnames=True) # Adjust min_support as needed
# Display frequent itemsets
print("\nFrequent Itemsets:\n", frequent_itemsets)
# Generate association rules with a confidence threshold
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6) # Adjust min_threshold as needed
# Display association rules
print("\nAssociation Rules:\n", rules)
# Optional: Save the frequent itemsets and rules to CSV files for further analysis
frequent_itemsets.to_csv('frequent_itemsets.csv', index=False)
rules.to_csv('association_rules.csv', index=False)
# Print summary
print("\nFrequent itemsets and association rules have been saved to CSV files.")