diff --git a/backend/solver.py b/backend/solver.py
index 517238d..473204f 100644
--- a/backend/solver.py
+++ b/backend/solver.py
@@ -244,12 +244,18 @@ def backtrack(group_idx, current_schedule_meta):
                 return
 
             # Pruning
-            if len(top_n_heap) == max_results:
+            # Only prune if we are sure score cannot increase.
+            # If compactness='low', score can increase (bonus for gaps).
+            can_prune = preferences.get('compactness') != 'low'
+
+            if can_prune and len(top_n_heap) == max_results:
                 partial_sched = [m['representative'] for m in current_schedule_meta]
                 partial_score = ScheduleRanker.score_schedule(partial_sched, preferences)
                 # Upper bound check (assuming score decreases with penalties)
-                # If partial score is already too low, we can't recover.
-                if partial_score < top_n_heap[0][0]:
+                # If partial score is already too low (or equal to the worst we have,
+                # and we can't improve), we can't beat the current set.
+                # Since we only replace if score > min_heap, partial <= min implies failure.
+                if partial_score <= top_n_heap[0][0]:
                     return
 
             candidates = meta_groups[group_idx]
diff --git a/tests/test_logic.py b/tests/test_logic.py
index 8485c22..2eba57a 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -6,6 +6,7 @@ def test_logic():
 
     # Dummy Course Data
     # Format: {'name': str, 'schedule_bitmaps': [0, ...]}
+    # Added 'selected': True to match solver expectation
 
     # Course A: Mon 1-2 (Bit 0, 1)
     # Course B: Mon 1-2 (Conflict with A)
@@ -16,9 +17,9 @@ def test_logic():
     mask_b = (1 << 0) | (1 << 1)
     mask_c = (1 << 2) | (1 << 3)
 
-    c_a = {'name': 'A', 'schedule_bitmaps': [0, mask_a, mask_a]}
-    c_b = {'name': 'B', 'schedule_bitmaps': [0, mask_b, mask_b]}
-    c_c = {'name': 'C', 'schedule_bitmaps': [0, mask_c, mask_c]}
+    c_a = {'name': 'A', 'schedule_bitmaps': [0, mask_a, mask_a], 'selected': True}
+    c_b = {'name': 'B', 'schedule_bitmaps': [0, mask_b, mask_b], 'selected': True}
+    c_c = {'name': 'C', 'schedule_bitmaps': [0, mask_c, mask_c], 'selected': True}
 
     groups = [
         {
@@ -51,11 +52,30 @@ def test_logic():
 
     # 2. Generate Schedules (from orig groups)
     # Possible: A+C, A+B(X), B+C, B+B(X) -> Valid: A+C, B+C.
-    schedules = ScheduleSolver.generate_schedules(groups)
-    assert len(schedules) == 2, f"Expected 2 schedules, got {len(schedules)}"
-    names = ["+".join([c['name'] for c in s]) for s in schedules]
-    assert "A+C" in names and "B+C" in names
-    print("    [+] Generation OK")
+    schedules, total = ScheduleSolver.generate_schedules(groups)
+    # Note: generate_schedules returns (schedules, count)
+
+    # Due to Meta-Candidate optimization, A and B (identical time) are clustered.
+    # So we get 1 schedule: [A (alts A,B), C].
+    # This represents both A+C and B+C.
+    assert len(schedules) == 1, f"Expected 1 meta-schedule, got {len(schedules)}"
+
+    sched = schedules[0]
+    # Find the course from Group 1 (A or B)
+    # Check if it has 2 alternatives
+
+    # Identify by name
+    names = [c['name'] for c in sched]
+    assert 'C' in names
+
+    # The other one is A (rep) or B (rep)
+    other = [c for c in sched if c['name'] != 'C'][0]
+    # Check alternatives
+    alts = other.get('alternatives', [])
+    alt_names = [a['name'] for a in alts]
+    assert 'A' in alt_names and 'B' in alt_names, f"Expected alternatives A and B, got {alt_names}"
+
+    print("    [+] Generation OK (Meta-Candidates verified)")
 
     # 3. Ranking
     # A+C (Mon 1-4). B+C (Mon 1-4).
diff --git a/tests/test_pruning.py b/tests/test_pruning.py
new file mode 100644
index 0000000..110e30e
--- /dev/null
+++ b/tests/test_pruning.py
@@ -0,0 +1,77 @@
+from backend.solver import ScheduleSolver
+from backend.ranker import ScheduleRanker
+
+def test_pruning_bug():
+    print("[*] Testing Pruning Bug...")
+
+    # Scenario:
+    # Preference: avoid_early_morning (Penalty), compactness='low' (Bonus).
+
+    # Group 1:
+    #   C1: Mon 5 (Slot 4). Score 100.
+    #   C2: Tue 1 (Slot 0). Early Penalty (-2). Partial Score 98.
+
+    # Group 2:
+    #   C3: Mon 6 (Slot 5).
+    #       With C1: Same day, adjacent. Gap 0. Bonus 0. Total 100.
+    #       With C2: Different days. Gap 0. Total 98.
+
+    #   C4: Tue 13 (Slot 12).
+    #       With C1: Different days. Gap 0. Total 100.
+    #       With C2: Same day. Gap 1 to 12. 11 slots. Bonus 2.2.
+    #                Total = 100 - 2 + 2.2 = 100.2. (WINNER).
+
+    # Execution Order (assuming C1 before C2, C3 before C4):
+    # 1. Backtrack(G1) -> Pick C1. Partial 100.
+    # 2. Backtrack(G2) -> Pick C3. [C1, C3]. Score 100.
+    #    Heap = [100].
+    # 3. Backtrack(G2) -> Pick C4. [C1, C4]. Score 100.
+    #    Heap unchanged (max_results=1, new score not > top).
+    # 4. Backtrack(G1) -> Pick C2. Partial Score 98.
+    # 5. Pruning Check: 98 < Heap[0] (100).
+    #    PRUNED!
+    # 6. [C2, C4] (Score 100.2) is never found.
+
+    # Bitmaps
+    def make_cand(name, day_idx, slot_idx):
+        # day_idx: 0=Mon, 1=Tue...
+        # bit = day*13 + slot
+        bit = day_idx * 13 + slot_idx
+        mask = (1 << bit)
+        bm = [0] * 30
+        bm[1] = mask
+        return {'name': name, 'schedule_bitmaps': bm, 'selected': True}
+
+    c1 = make_cand('C1', 0, 4) # Mon 5
+    c2 = make_cand('C2', 1, 0) # Tue 1 (Early)
+    c3 = make_cand('C3', 0, 5) # Mon 6
+    c4 = make_cand('C4', 1, 12) # Tue 13
+
+    g1 = {'id': 1, 'candidates': [c1, c2]}
+    g2 = {'id': 2, 'candidates': [c3, c4]}
+
+    prefs = {'avoid_early_morning': True, 'compactness': 'low'}
+
+    schedules, total = ScheduleSolver.generate_schedules([g1, g2], max_results=1, preferences=prefs)
+
+    print(f"DEBUG: Found {len(schedules)} schedules.")
+    if len(schedules) > 0:
+        best = schedules[0]
+        names = sorted([c['name'] for c in best])
+        score = ScheduleRanker.score_schedule(best, prefs)
+        print(f"DEBUG: Best Schedule: {names}, Score: {score}")
+
+        # Check if C2+C4 is the result
+        if 'C2' in names and 'C4' in names:
+            print("    [+] Success! Found optimal schedule with Bonus.")
+        else:
+            print("    [-] Failure! Pruned optimal schedule (Found sub-optimal).")
+            # We exit with success here if we WANT to demonstrate failure?
+            # No, test should fail if bug exists.
+            exit(1)
+    else:
+        print("    [-] No schedules found.")
+        exit(1)
+
+if __name__ == "__main__":
+    test_pruning_bug()
diff --git a/tests/test_scalability.py b/tests/test_scalability.py
new file mode 100644
index 0000000..682d8ae
--- /dev/null
+++ b/tests/test_scalability.py
@@ -0,0 +1,62 @@
+import time
+import random
+from backend.solver import ScheduleSolver
+
+def test_scalability():
+    print("[*] Testing Scalability...")
+
+    # 10 Groups, 5 Candidates each.
+    # Total space: 5^10 = 9.7 million.
+    # We want to confirm it doesn't hang.
+
+    # Generate random candidates
+    # To encourage pruning/conflicts, use a small week range or few slots.
+    # E.g. All classes on Mon-Fri, 13 slots/day. 65 slots.
+    # Each class takes 2 slots.
+
+    random.seed(42) # Deterministic
+
+    groups = []
+    for g_id in range(10):
+        cands = []
+        for c_id in range(5):
+            # Random slot
+            day = random.randint(0, 4)
+            slot = random.randint(0, 11) # Slots 0-11 (for size 2)
+
+            # Bitmap
+            bm = [0] * 30
+            # Set bits for day/slot and day/slot+1
+            mask = (1 << (day*13 + slot)) | (1 << (day*13 + slot + 1))
+            bm[1] = mask # Week 1
+
+            cands.append({
+                'name': f'G{g_id}_C{c_id}',
+                'schedule_bitmaps': bm,
+                'selected': True
+            })
+
+        groups.append({
+            'id': g_id,
+            'candidates': cands
+        })
+
+    print(f"    [i] Generated 10 groups, 5 candidates each. (Space ~9.7M)")
+
+    start_time = time.time()
+    schedules, total = ScheduleSolver.generate_schedules(groups, max_results=20)
+    end_time = time.time()
+
+    duration = end_time - start_time
+    print(f"    [+] Finished in {duration:.4f} seconds.")
+    print(f"    [+] Found {total} valid schedules (Top {len(schedules)} returned).")
+
+    # Assert it finished reasonably fast (e.g. < 20s)
+    if duration > 20.0:
+        print("    [-] Too slow!")
+        exit(1)
+    else:
+        print("    [+] Scalability OK.")
+
+if __name__ == "__main__":
+    test_scalability()