-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparams.yaml
More file actions
110 lines (96 loc) · 2.51 KB
/
params.yaml
File metadata and controls
110 lines (96 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Diamond Price Predictor - Model Parameters and Configuration
# Data Ingestion Configuration
data_ingestion:
source_url: "data/raw/sample_diamonds.csv"
local_data_file: "data/raw/diamonds.csv"
raw_data_path: "data/raw"
train_test_split_ratio: 0.2
random_state: 42
# Data validation schema
required_columns:
- "carat"
- "cut"
- "color"
- "clarity"
- "depth"
- "table"
- "price"
- "x"
- "y"
- "z"
# Data type expectations
column_types:
carat: "float64"
cut: "object"
color: "object"
clarity: "object"
depth: "float64"
table: "float64"
price: "int64"
x: "float64"
y: "float64"
z: "float64"
# Data validation ranges
validation_ranges:
carat: [0.2, 5.01]
depth: [43.0, 79.0]
table: [43.0, 95.0]
price: [326, 18823]
x: [0.0, 10.74]
y: [0.0, 58.9]
z: [0.0, 31.8]
# Model Training Configuration
model_trainer:
model_name: "xgboost"
test_size: 0.2
random_state: 42
# Performance targets
target_accuracy: 0.95 # 95%+ R² score requirement
max_training_time_minutes: 10
cv_folds: 5
# XGBoost hyperparameter search space for optimization
xgboost:
n_estimators: [100, 150, 200]
learning_rate: [0.01, 0.1, 0.2]
max_depth: [3, 5, 7]
subsample: [0.8, 0.9, 1.0]
colsample_bytree: [0.8, 0.9, 1.0]
reg_alpha: [0, 0.1, 0.5]
reg_lambda: [1, 1.5, 2]
random_state: 42
# Alternative models for comparison
random_forest:
n_estimators: [100, 200, 300]
max_depth: [10, 15, 20]
min_samples_split: [2, 5, 10]
min_samples_leaf: [1, 2, 4]
random_state: 42
# Model evaluation metrics
metrics:
- "mean_absolute_error"
- "mean_squared_error"
- "root_mean_squared_error"
- "r2_score"
- "mean_absolute_percentage_error"
primary_metric: "r2_score"
# Hyperparameter optimization
optimization_method: "grid_search" # grid_search, random_search
n_jobs: -1 # Use all available cores
verbose: 1
# Data Transformation Configuration
data_transformation:
preprocessor_obj_file_path: "artifacts/preprocessor.pkl"
# Model Evaluation Configuration
model_evaluation:
model_path: "artifacts/model.pkl"
target_column: "price"
# MLflow Configuration
mlflow:
experiment_name: "diamond_price_prediction"
run_name: "xgboost_baseline"
tracking_uri: "http://localhost:5000"
# Logging Configuration
logging:
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file: "logs/diamond_predictor.log"