-
Notifications
You must be signed in to change notification settings - Fork 838
Description
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
Step 1: Load or simulate a dataset
Using mock data here (replace with real dataset if needed)
def create_mock_data():
np.random.seed(42)
n = 1000
data = {
'transaction_amount': np.random.exponential(scale=100, size=n),
'transaction_time': np.random.randint(0, 24, size=n),
'account_age_days': np.random.randint(1, 1000, size=n),
'is_foreign_transaction': np.random.randint(0, 2, size=n),
'is_high_risk_country': np.random.randint(0, 2, size=n),
'is_fraud': np.random.choice([0, 1], size=n, p=[0.95, 0.05])
}
return pd.DataFrame(data)
Step 2: Prepare the data
df = create_mock_data()
X = df.drop('is_fraud', axis=1)
y = df['is_fraud']
Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
Step 4: Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
Step 5: Make predictions
y_pred = model.predict(X_test)
Step 6: Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
Step 7: Feature importance visualization
feature_importances = pd.Series(model.feature_importances_, index=X.columns)
feature_importances.sort_values().plot(kind='barh')
plt.title('Feature Importance')
plt.tight_layout()
plt.show()
Confusion Matrix:
[[189 1]
[ 6 4]]
Classification Report:
precision recall f1-score support
0 0.97 0.99 0.98 190
1 0.80 0.40 0.53 10
Accuracy: 0.965
@YBI-Foundation