forked from corndel-ai/testing-mini-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
95 lines (80 loc) · 3.51 KB
/
app.py
File metadata and controls
95 lines (80 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from src.TextClassifier import TextClassifier
import logging
import pandas as pd
from sklearn.model_selection import train_test_split
# Configure basic logging for this application runner
# This is separate from the test logging and shows how you might log application flow
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
def main() -> None:
"""
Demonstrates a simple text classification application flow.
Initializes, trains, and uses the TextClassifier to make predictions.
"""
logging.info("Starting the text classification application.")
# 1. Initialise the TextClassifier
classifier = TextClassifier()
logging.info("TextClassifier instance created.")
# 2. Prepare some training data from the CSV
try:
training_df = pd.read_csv("./data/raw/text-label.csv")
training_texts = training_df["text"].tolist()
training_labels = training_df["label"].tolist()
logging.info(f"Loaded {len(training_texts)} training samples from CSV.")
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
training_texts,
training_labels,
test_size=0.2,
random_state=42,
stratify=training_labels,
)
logging.info(
f"Data split: {len(X_train)} training samples, {len(X_test)} test samples."
)
except Exception as e:
logging.error(f"Error loading training data from CSV: {e}", exc_info=True)
return # Exit if data loading fails
# 3. Train the model
try:
classifier.train(X_train, y_train)
logging.info("Model training successful.")
except Exception as e:
logging.error(f"Error during model training: {e}", exc_info=True)
return
# 4. Evaluate the model using the test split
try:
accuracy = classifier.evaluate(X_test, y_test)
print(f"\n--- Model Evaluation on Test Set ---")
print(f"Accuracy: {accuracy:.2f}")
print("----------------------------------")
logging.info(f"Model evaluated on test set with accuracy: {accuracy:.2f}")
except Exception as e:
logging.warning(f"Could not perform evaluation on test set: {e}")
# 5. Prepare some new, truly unseen data for prediction (optional, distinct from test set)
new_texts_for_prediction = [
"This is an absolutely fantastic product, highly recommended!",
"I am extremely disappointed with the quality.",
"It's an average item, nothing special but not bad.",
"What a great product, I will buy 2 more!",
"This was a terrible investment, I regret it.",
]
logging.info(
f"Prepared {len(new_texts_for_prediction)} new samples for prediction."
)
# 6. Make predictions on new data
try:
predictions = classifier.predict(new_texts_for_prediction)
logging.info("Predictions generated successfully on new data.")
except Exception as e:
logging.error(f"Error during prediction on new data: {e}", exc_info=True)
return # Exit if prediction fails
# 7. Display the prediction results
print("\n--- Predictions on New Unseen Data ---")
for i, (text, prediction) in enumerate(zip(new_texts_for_prediction, predictions)):
print(f"Text {i+1}: '{text}' -> Predicted Sentiment: {prediction.upper()}")
print("------------------------------------")
logging.info("Text classification application finished.")
if __name__ == "__main__":
main()