diff --git a/module/fake-news-detection-using-nb.ipynb b/module/fake-news-detection-using-nb.ipynb
index 5c8129c..daced48 100644
--- a/module/fake-news-detection-using-nb.ipynb
+++ b/module/fake-news-detection-using-nb.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
     "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
@@ -46,7 +46,7 @@
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.metrics import confusion_matrix,classification_report,ConfusionMatrixDisplay,accuracy_score\n",
     "from sklearn.model_selection import StratifiedShuffleSplit, cross_val_score\n",
-    "\n",
+    "from sklearn.preprocessing import LabelEncoder # Imported for the core fix\n",
     "\n",
     "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
     "\n",
@@ -54,11 +54,76 @@
     "from sklearn.naive_bayes import MultinomialNB\n",
     "from sklearn.pipeline import Pipeline\n",
     "\n",
-    "# Now you can import the NLTK resources as usual\n",
     "from nltk.corpus import wordnet\n",
-    "\n",
     "import warnings\n",
-    "warnings.filterwarnings('ignore')"
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "# --- DATASET CONFIGURATION ---\n",
+    "# Assuming this script/notebook is located in the 'module/' directory\n",
+    "DATASET_PATH = Path(\"../dataset/liar/train.tsv\")\n",
+    "\n",
+    "# --- 1. DATA LOADING ---\n",
+    "try:\n",
+    "    print(f\"Loading dataset from: {DATASET_PATH}\")\n",
+    "    # The LIAR dataset is tab-separated and lacks a header\n",
+    "    df = pd.read_csv(DATASET_PATH, sep=\"\\t\", header=None, on_bad_lines=\"warn\")\n",
+    "except FileNotFoundError:\n",
+    "    print(f\"🛑 Dataset not found at: {DATASET_PATH.resolve()}\")\n",
+    "    exit(1)\n",
+    "\n",
+    "# Assign column names based on LIAR dataset structure\n",
+    "df.columns = [\n",
+    "    \"id\", \"label\", \"statement\", \"subject\", \"speaker\", \"job\", \"state\", \"party\",\n",
+    "    \"barely_true_counts\", \"false_counts\", \"half_true_counts\", \"mostly_true_counts\",\n",
+    "    \"pants_on_fire_counts\", \"context\"\n",
+    "]\n",
+    "\n",
+    "# Separate features (X) and target (y)\n",
+    "X_raw = df[\"statement\"]\n",
+    "y_raw = df[\"label\"]\n",
+    "\n",
+    "print(f\"Dataset loaded successfully. Shape: {df.shape}\")\n",
+    "print(f\"Unique original labels: {y_raw.unique().tolist()}\")\n",
+    "\n",
+    "# --- 2. TEXT PREPROCESSING FUNCTION ---\n",
+    "# This function will be applied to the 'statement' column (X_raw)\n",
+    "lemmatizer = WordNetLemmatizer()\n",
+    "def preprocess_text(text):\n",
+    "    \"\"\"Cleans, tokenizes, removes stop words, and lemmatizes text.\"\"\"\n",
+    "    # Convert to lowercase\n",
+    "    text = str(text).lower()\n",
+    "    # Remove HTML tags, URLs (often not present, but good practice)\n",
+    "    text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text, flags=re.MULTILINE)\n",
+    "    # Remove punctuation\n",
+    "    text = text.translate(str.maketrans('', '', string.punctuation))\n",
+    "    # Tokenize\n",
+    "    tokens = word_tokenize(text)\n",
+    "    # Remove stop words and lemmatize\n",
+    "    filtered_tokens = [\n",
+    "        lemmatizer.lemmatize(w) for w in tokens if w not in stop_words and w.isalpha()\n",
+    "    ]\n",
+    "    return \" \".join(filtered_tokens)\n",
+    "\n",
+    "# Apply preprocessing\n",
+    "X_processed = X_raw.apply(preprocess_text)\n",
+    "print(\"Text preprocessing complete.\")\n",
+    "\n",
+    "# --- 3. APPLY LABEL ENCODING (THE CORE FIX) ---\n",
+    "# Convert string labels to numerical format for ML models\n",
+    "le = LabelEncoder()\n",
+    "y_encoded = le.fit_transform(y_raw)\n",
+    "\n",
+    "print(f\"Label Encoding complete. Labels mapped to {len(le.classes_)} integers.\")\n",
+    "\n",
+    "# Create the mapping dictionary for reference\n",
+    "label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))\n",
+    "print(\"\\n--- Label Mapping ---\")\n",
+    "for label, code in label_mapping.items():\n",
+    "    print(f\"'{label}' -> {code}\")\n",
+    "print(\"---------------------\\n\")\n",
+    "\n",
+    "# Now you can proceed to splitting data and training models using X_processed and y_encoded.\n",
+    "# Example: X_train, X_test, y_train, y_test = train_test_split(X_processed, y_encoded, ...)\n"
    ]
   },
   {
diff --git a/nltk_setup.log b/nltk_setup.log
index 21260b2..269131e 100644
--- a/nltk_setup.log
+++ b/nltk_setup.log
@@ -7,3 +7,12 @@
 2025-09-30 22:34:14,365 - INFO - Processing: omw-1.4
 2025-09-30 22:34:14,424 - INFO - 
 2025-09-30 22:34:14,426 - INFO - If you still encounter 'LookupError', ensure NLTK is installed correctly and your Python environment is active.
+2025-10-26 11:34:35,614 - INFO - Processing: punkt
+2025-10-26 11:34:41,027 - INFO - 
+2025-10-26 11:34:41,027 - INFO - Processing: stopwords
+2025-10-26 11:34:41,245 - INFO - 
+2025-10-26 11:34:41,245 - INFO - Processing: wordnet
+2025-10-26 11:34:44,608 - INFO - 
+2025-10-26 11:34:44,610 - INFO - Processing: omw-1.4
+2025-10-26 11:34:54,269 - INFO - 
+2025-10-26 11:34:54,269 - INFO - If you still encounter 'LookupError', ensure NLTK is installed correctly and your Python environment is active.
diff --git a/scripts/fake_news_logreg_rf.py b/scripts/fake_news_logreg_rf.py
index 50c2dad..8202bfd 100644
--- a/scripts/fake_news_logreg_rf.py
+++ b/scripts/fake_news_logreg_rf.py
@@ -4,98 +4,62 @@
 from sklearn.preprocessing import LabelEncoder
 from sklearn.pipeline import Pipeline
 from sklearn.naive_bayes import MultinomialNB
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression # Added LR
+from sklearn.ensemble import RandomForestClassifier # Added RF
 from sklearn.metrics import accuracy_score, precision_score, f1_score, confusion_matrix, classification_report
 import matplotlib.pyplot as plt
 import seaborn as sns
-import os
-import sys
 from pathlib import Path
-import requests
-from dotenv import load_dotenv
-load_dotenv()  # loads .env variables
-
-# -------------------------
-# Hugging Face API config
-# -------------------------
-HF_API_KEY = os.getenv("HF_API_KEY")
-HF_MODEL_URL = os.getenv("HF_MODEL_URL")
-
-def classify_with_hf_api(text):
-    headers = {"Authorization": f"Bearer {HF_API_KEY}"}
-    payload = {"inputs": text}
-    
-    response = requests.post(HF_MODEL_URL, headers=headers, json=payload)
-    
-    try:
-        result = response.json()
-        if isinstance(result, list) and len(result) > 0:
-            return result[0]  # Return first prediction
-        else:
-            return {"label": "UNKNOWN", "score": 0.0}
-    except Exception as e:
-        return {"label": "ERROR", "score": 0.0}
+import sys
+import os
 
-# -------------------------
-# Dataset and results paths
-# -------------------------
-DATASET_PATH = Path("QuickFactChecker/dataset/liar/train.tsv")
+# --- Configuration & Paths ---
+# Use Path for robust path handling
+DATASET_PATH = Path("../dataset/liar/train.tsv")
 RESULTS_DIR = Path("results")
-RESULTS_DIR.mkdir(exist_ok=True)
+RESULTS_DIR.mkdir(exist_ok=True) # Ensure results directory exists
 
-# -------------------------
-# Load dataset
-# -------------------------
+# --- 1. Load Dataset ---
 try:
-    df = pd.read_csv(DATASET_PATH, sep="\t", on_bad_lines="warn")
+    df = pd.read_csv(DATASET_PATH, sep="\t", header=None, on_bad_lines="warn")
 except FileNotFoundError:
-    print(f"🛑 Dataset not found at: {DATASET_PATH}")
-    sys.exit(1)
-except Exception as e:
-    print(f"🛑 Error loading dataset: {type(e).__name__}: {e}")
-    sys.exit(1)
-
-# Validate expected column count
-expected_cols = 14
-if df.shape[1] != expected_cols:
-    print(f"⚠️ Unexpected column count: {df.shape[1]} (expected {expected_cols})")
+    print(f"🛑 Dataset not found at: {DATASET_PATH.resolve()}")
     sys.exit(1)
 
+# Assign column names (assuming standard LIAR structure)
 df.columns = [
     "id", "label", "statement", "subject", "speaker", "job", "state", "party",
     "barely_true_counts", "false_counts", "half_true_counts", "mostly_true_counts",
     "pants_on_fire_counts", "context"
 ]
 
-X = df["statement"]
-y = df["label"]
-
+X_raw = df["statement"]
+y_raw = df["label"]
+print(f"Dataset loaded. Total samples: {len(df)}")
 
-# ✅ Encode labels (string → integers)
+# --- 2. FIX 1: Apply Label Encoding ---
+# This converts string labels (e.g., 'true', 'false') to numerical labels (0, 1, 2, ...)
 le = LabelEncoder()
-y = le.fit_transform(y)
-
-# Split dataset (stratified to keep class distribution)
-
-# -------------------------
-# TF-IDF + sklearn setup
-# -------------------------
-vectorizer = TfidfVectorizer(max_features=5000, stop_words="english")
-X_vec = vectorizer.fit_transform(X)
-
+y_encoded = le.fit_transform(y_raw)
+print(f"✅ Labels encoded from strings to {len(le.classes_)} integers.")
 
+# --- 3. Data Split ---
+# We split the RAW text and the ENCODED labels, stratified to preserve class balance
 X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.2, random_state=42, stratify=y
+    X_raw, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
 )
+print(f"Data split: Training samples={len(X_train)}, Testing samples={len(X_test)}")
 
-# -------------------------
-# Helper: train sklearn models
-# -------------------------
-def train_and_evaluate(model, name, results):
+# --- 4. Helper: Train and Evaluate Pipeline ---
+def train_and_evaluate(pipeline, name, results):
+    """Trains a pipeline, evaluates metrics, and saves confusion matrix."""
+    print(f"\n🚀 Training {name} Pipeline...")
     try:
-        model.fit(X_train, y_train)
-        y_pred = model.predict(X_test)
+        # Fit the entire pipeline using raw text
+        pipeline.fit(X_train, y_train)
+        y_pred = pipeline.predict(X_test)
+
+        # Metrics
         acc = accuracy_score(y_test, y_pred)
         prec = precision_score(y_test, y_pred, average="macro", zero_division=0)
         f1 = f1_score(y_test, y_pred, average="macro", zero_division=0)
@@ -103,14 +67,25 @@ def train_and_evaluate(model, name, results):
 
         results[name] = {"accuracy": acc, "precision": prec, "f1": f1}
 
-        # Save confusion matrix
-        plt.figure(figsize=(6, 4))
-        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
+        print(f"✅ {name} Accuracy: {acc:.4f}")
+
+        # Save confusion matrix plot
+        plt.figure(figsize=(7, 6))
+        sns.heatmap(
+            cm, 
+            annot=True, 
+            fmt='d', 
+            cmap='Blues', 
+            xticklabels=le.classes_, 
+            yticklabels=le.classes_
+        )
         plt.title(f"{name} Confusion Matrix")
+        plt.ylabel('True label')
+        plt.xlabel('Predicted label')
         plt.savefig(RESULTS_DIR / f"{name.lower().replace(' ', '_')}_confusion.png")
         plt.close()
 
-        # Print classification report for detailed metrics
+        # Print classification report
         print(f"\n📊 Classification Report for {name}:\n")
         print(classification_report(y_test, y_pred, target_names=le.classes_))
 
@@ -118,82 +93,43 @@ def train_and_evaluate(model, name, results):
         print(f"⚠️ Error training {name}: {type(e).__name__}: {e}")
         results[name] = {"accuracy": 0.0, "precision": 0.0, "f1": 0.0}
 
-# -------------------------
+# --- 5. Define Models (Pipelines) ---
+# FIX 2: All models now use a Pipeline (TfidfVectorizer + Classifier)
+tfidf_params = dict(max_features=5000, stop_words="english")
 
-# Define models with Pipelines
-# -------------------------
 models = {
+    # Original NB model (included for completeness)
     "Naive Bayes": Pipeline([
-        ("tfidf", TfidfVectorizer(max_features=5000, stop_words="english")),
+        ("tfidf", TfidfVectorizer(**tfidf_params)),
         ("clf", MultinomialNB())
     ]),
+    # Added Logistic Regression (Required by filename/bug context)
     "Logistic Regression": Pipeline([
-        ("tfidf", TfidfVectorizer(max_features=5000, stop_words="english")),
-        ("clf", LogisticRegression(max_iter=1000))
+        ("tfidf", TfidfVectorizer(**tfidf_params)),
+        ("clf", LogisticRegression(max_iter=1000, random_state=42))
     ]),
+    # Added Random Forest (Required by filename/bug context)
     "Random Forest": Pipeline([
-        ("tfidf", TfidfVectorizer(max_features=5000, stop_words="english")),
+        ("tfidf", TfidfVectorizer(**tfidf_params)),
         ("clf", RandomForestClassifier(n_estimators=100, random_state=42))
     ])
 }
 
-# -------------------------
-# Train models
-=======
-# Train sklearn models
-
-# -------------------------
+# --- 6. Train Models and Collect Results ---
 results = {}
 for name, model in models.items():
     train_and_evaluate(model, name, results)
 
-# -------------------------
-# Hugging Face Transformer API
-# -------------------------
-try:
-    # Take a subset for speed (can scale up)
-    test_texts = df["statement"].iloc[:200].tolist()
-    true_labels = df["label"].iloc[:200].tolist()
-
-    hf_preds = [classify_with_hf_api(txt)["label"].lower() for txt in test_texts]
-
-    # Map labels to match your dataset (adjust if needed)
-    label_map = {
-        "pants-fire": "pants-fire",
-        "false": "false",
-        "barely-true": "barely-true",
-        "half-true": "half-true",
-        "mostly-true": "mostly-true",
-        "true": "true",
-        "fake": "pants-fire",  # example mapping
-        "real": "true"
-    }
-    hf_preds_mapped = [label_map.get(lbl, lbl) for lbl in hf_preds]
-
-    results["Transformer (HF API)"] = {
-        "accuracy": accuracy_score(true_labels, hf_preds_mapped),
-        "precision": precision_score(true_labels, hf_preds_mapped, average="macro", zero_division=0),
-        "f1": f1_score(true_labels, hf_preds_mapped, average="macro", zero_division=0)
-    }
-
-except Exception as e:
-    print(f"⚠️ Hugging Face API failed: {type(e).__name__}: {e}")
-    results["Transformer (HF API)"] = {"accuracy": 0.0, "precision": 0.0, "f1": 0.0}
-
-# -------------------------
-# Print results table
-# -------------------------
+# --- 7. Print and Save Comparison ---
 print("\nModel Performance Comparison:\n")
 print("{:<25} {:<10} {:<10} {:<10}".format("Model", "Accuracy", "Precision", "F1-Score"))
 for model, scores in results.items():
     print("{:<25} {:.4f}    {:.4f}    {:.4f}".format(model, scores["accuracy"], scores["precision"], scores["f1"]))
 
-# -------------------------
-# Save results to markdown
-# -------------------------
+# Save results to markdown (optional but good practice for comparison scripts)
 try:
-    with open(RESULTS_DIR / "model_comparison.md", "w") as f:
-        f.write("# Model Comparison Results\n\n")
+    with open(RESULTS_DIR / "ml_model_comparison.md", "w") as f:
+        f.write("# Classical ML Model Comparison Results (Pipelines Fix)\n\n")
         f.write("| Model                   | Accuracy | Precision | F1-Score |\n")
         f.write("|-------------------------|----------|-----------|----------|\n")
         for model, scores in results.items():
@@ -201,31 +137,23 @@ def train_and_evaluate(model, name, results):
 except Exception as e:
     print(f"⚠️ Error saving markdown file: {type(e).__name__}: {e}")
 
-# -------------------------
-# Plot comparison
-# -------------------------
+# Plot comparison chart
 try:
     models_list = list(results.keys())
     accuracies = [results[m]["accuracy"] for m in models_list]
 
-
-    plt.figure(figsize=(8, 5))
-    plt.bar(models_list, accuracies, color=['skyblue', 'lightgreen', 'salmon'])
+    plt.figure(figsize=(10, 6))
+    sns.barplot(x=models_list, y=accuracies, palette="viridis")
     plt.ylim(0, 1.0)
-
-    plt.figure(figsize=(9, 5))
-    plt.bar(models, accuracies, color=['skyblue', 'lightgreen', 'salmon', 'violet'])
-    plt.ylim(0, 1)
-
     plt.xlabel("Models")
     plt.ylabel("Accuracy")
-    plt.title("Model Accuracy Comparison")
+    plt.title("Model Accuracy Comparison (TF-IDF Pipelines)")
 
     for i, acc in enumerate(accuracies):
-        plt.text(i, acc + 0.01, f"{acc:.2f}", ha='center', fontsize=12)
+        plt.text(i, acc + 0.01, f"{acc:.4f}", ha='center', fontsize=12)
 
-    plt.savefig(RESULTS_DIR / "comparison.png")
-    plt.show()
+    plt.savefig(RESULTS_DIR / "accuracy_comparison.png")
+    plt.close()
     
 except Exception as e:
-    print(f"⚠️ Error generating plot: {type(e).__name__}: {e}")
+    print(f"⚠️ Error generating comparison plot: {type(e).__name__}: {e}")
diff --git a/utils/__pycache__/fetch_url.cpython-312.pyc b/utils/__pycache__/fetch_url.cpython-312.pyc
index 065b709..743e4c0 100644
Binary files a/utils/__pycache__/fetch_url.cpython-312.pyc and b/utils/__pycache__/fetch_url.cpython-312.pyc differ