diff --git a/autograder/autograder.py b/autograder/autograder.py
new file mode 100644
index 0000000..b6f2fb3
--- /dev/null
+++ b/autograder/autograder.py
@@ -0,0 +1,57 @@
+from autograder.services.report.reporter_service import ReporterService
+from autograder.services.upstash_driver import UpstashDriver
+from autograder.pipeline import AutograderPipeline
+from autograder.steps.export_step import ExporterStep
+from autograder.steps.feedback_step import FeedbackStep
+from autograder.steps.grade_step import GradeStep
+from autograder.steps.load_template_step import TemplateLoaderStep
+from autograder.steps.pre_flight_step import PreFlightStep
+from autograder.steps.build_tree_step import BuildTreeStep
+
+
+def build_pipeline(
+                 template_name,
+                 include_feedback,
+                 grading_criteria,
+                 feedback_config,
+                 setup_config = None,
+                 custom_template = None,
+                 feedback_mode = None):
+    """
+    Build an autograder pipeline based on configuration.
+
+    Args:
+        template_name: Name of the template to use
+        include_feedback: Whether to include feedback generation
+        grading_criteria: Criteria configuration dictionary
+        feedback_config: Configuration for feedback generation
+        setup_config: Pre-flight setup configuration
+        custom_template: Custom template object (if any)
+        feedback_mode: Mode for feedback generation
+    Returns:
+        Configured AutograderPipeline
+    """
+    pipeline = AutograderPipeline()
+
+    # Load template
+    pipeline.add_step(TemplateLoaderStep(template_name, custom_template)) # Passes the template to the next step
+
+    pipeline.add_step(BuildTreeStep(grading_criteria)) # Uses template to match selected tests in criteria and builds tree
+
+    # Pre-flight checks (if configured)
+    if setup_config:
+        pipeline.add_step(PreFlightStep(setup_config))
+
+    pipeline.add_step(GradeStep()) # Generates GradingResult with final score and result tree
+
+    # Feedback generation (if configured)
+    if include_feedback:
+        reporter_service = ReporterService(feedback_mode=feedback_mode)
+        pipeline.add_step(FeedbackStep(reporter_service, feedback_config)) # Uses GradingResult to generate feedback and appends it to GradingResult
+
+    # Export results
+    pipeline.add_step(ExporterStep(UpstashDriver)) # Exports final results and feedback
+
+    return pipeline
+
+
diff --git a/autograder/autograder_facade.py b/autograder/autograder_facade.py
deleted file mode 100644
index 5a9f666..0000000
--- a/autograder/autograder_facade.py
+++ /dev/null
@@ -1,415 +0,0 @@
-import logging
-
-from autograder.builder.models.template import Template
-from autograder.context import request_context
-from autograder.core.grading.grader import Grader
-from autograder.core.models.autograder_response import AutograderResponse
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.models.result import Result
-from autograder.core.report.reporter_factory import Reporter
-from autograder.core.utils.upstash_driver import Driver
-from connectors.models.assignment_config import AssignmentConfig
-from connectors.models.autograder_request import AutograderRequest
-from autograder.builder.tree_builder import CriteriaTree
-from autograder.builder.template_library.library import TemplateLibrary
-
-
-from autograder.builder.pre_flight import PreFlight
-
-logger = logging.getLogger(__name__)
-
-class Autograder:
-
-    # Static member that's accessible by all methods
-    selected_template : Template = None
-    feedback_preferences: FeedbackPreferences = None
-
-    @staticmethod
-    def grade(autograder_request: AutograderRequest):
-        logger.info("Starting autograder process")
-
-        # Set the request in the global context at the beginning of the process
-        request_context.set_request(autograder_request)
-        if autograder_request.openai_key:
-            logger.info("OpenAI key provided, AI feedback mode may be used")
-            logger.info("Setting environment variable for OpenAI key")
-            import os
-            os.environ["OPENAI_API_KEY"] = autograder_request.openai_key
-        try:
-
-            # Step 1: Handle Pre-flight checks if setup is defined
-            if autograder_request.assignment_config.setup:
-                Autograder._pre_flight_step()
-
-            # Step 2: Get test template
-            logger.info("Importing test template")
-            Autograder._import_template_step()
-
-            # Step 3: Build criteria tree
-            logger.info("Building criteria tree from assignment configuration:")
-            Autograder._build_criteria_step()
-
-            # Step 4: Initialize and run grader
-            logger.info("Starting grading process")
-            result = Autograder._start_and_run_grader()
-            logger.info(f"Grading completed. Final score: {result.final_score}")
-            
-            if autograder_request.redis_token and autograder_request.redis_url:
-              Autograder.export_final_score(result.final_score)
-
-            if autograder_request.include_feedback:
-                # Step 5: Setup feedback preferences
-                logger.info("Processing feedback preferences")
-                Autograder._setup_feedback_pref()
-                logger.debug(f"Feedback mode: {autograder_request.feedback_mode}")
-
-                # Step 6: Create reporter based on feedback mode
-                Autograder.create_feedback_report(result)
-
-                # Step 7: Generate feedback
-                logger.info("Generating feedback report")
-                feedback_report = Autograder._generate_feedback()
-                logger.info("Feedback report generated successfully")
-
-
-                # Step 8: Create and return the successful response
-                logger.info("Creating successful autograder response")
-                response = AutograderResponse(
-                    status = "Success",
-                    final_score = result.final_score,
-                    feedback = feedback_report,
-                    test_report = result.get_test_report()
-                )
-                logger.info("Autograder process completed successfully")
-                return response
-            else:
-                logger.info("Feedback not requested, returning score only")
-                return AutograderResponse(
-                     status="Success",
-                     final_score=result.final_score,
-                     feedback="",
-                     test_report=result.get_test_report()
-                )
-
-
-        except Exception as e:
-            # Catch any exception, log it, and return a failure response
-            error_message = f"An unexpected error occurred during the grading process: {str(e)}"
-            logger.error(error_message)
-            logger.exception("Full exception traceback:")
-            return AutograderResponse(status="fail", final_score=0.0, feedback=error_message, test_report=[])
-
-    @staticmethod
-    def _pre_flight_step():
-
-         if request_context.get_request() and request_context.get_request().assignment_config.setup:
-                logger.info("Running pre-flight setup commands")
-                impediments = PreFlight.run()
-                if impediments:
-                     error_messages = [impediment['message'] for impediment in impediments]
-                     error_text = "\n".join(error_messages)
-                     logger.error(f"Pre-flight checks failed with errors: {error_messages}")
-                     raise RuntimeError(error_text)
-
-         logger.info("Pre-flight setup completed with no impediments")
-
-
-
-    @staticmethod
-    def _import_template_step():
-        req = request_context.get_request()
-        template_name = req.assignment_config.template
-        if template_name == "custom":
-            logger.info(f"Loading custom test template provided!")
-            test_template = TemplateLibrary.get_template(template_name,req.assignment_config.custom_template_str)
-        else:
-            logger.info(f"Loading test template: '{template_name}'")
-            test_template = TemplateLibrary.get_template(template_name)
-        if test_template is None:
-            logger.error(f"Template '{template_name}' not found in TemplateLibrary")
-            raise ValueError(f"Unsupported template: {template_name}")
-
-        logger.info(f"Test template '{test_template.template_name}' instantiated successfully")
-        Autograder.selected_template = test_template
-
-
-    @staticmethod
-    def _build_criteria_step():
-        req = request_context.get_request()
-        test_template = Autograder.selected_template
-
-        if test_template.requires_pre_executed_tree:
-            logger.info("Template requires pre-executed criteria tree.")
-            criteria_tree = CriteriaTree.build_pre_executed_tree(test_template)
-            criteria_tree.print_pre_executed_tree()
-        else:
-            logger.info("Template does not require pre-executed criteria tree.")
-            criteria_tree = CriteriaTree.build_non_executed_tree()
-
-        test_template.stop()
-        criteria_tree.print_pre_executed_tree()
-        logger.info("Criteria tree built successfully")
-
-        req.criteria_tree = criteria_tree
-        return criteria_tree
-
-    @staticmethod
-    def _start_and_run_grader():
-        req = request_context.get_request()
-        criteria_tree = req.criteria_tree
-        test_template = Autograder.selected_template
-
-
-        logger.info("Initializing grader with criteria tree and test template")
-        grader = Grader(criteria_tree, test_template)
-        logger.debug(f"Grader initialized for student: {req.student_name}")
-
-
-        logger.info(f"Running grading process")
-
-        result = grader.run()
-
-        return result
-
-    @staticmethod
-    def export_final_score(final_score):
-        req = request_context.get_request()
-        student_credentials = req.student_credentials
-        if req.redis_token and req.redis_url:
-            logger.info("Sending final score to Redis")
-            driver = Driver.create(req.redis_token, req.redis_url)
-            if driver is not None:
-                if driver.user_exists(student_credentials):
-                    driver.set_score(student_credentials, final_score)
-                else:
-                    driver.create_user(student_credentials)
-                    driver.set_score(student_credentials, final_score)
-                logger.info("Final score sent to Redis successfully")
-
-    @staticmethod
-    def _setup_feedback_pref():
-        feedback = FeedbackPreferences.from_dict()
-        Autograder.feedback_preferences = feedback
-
-    @staticmethod
-    def create_feedback_report(result: Result):
-
-        req = request_context.get_request()
-        template = Autograder.selected_template
-        feedback = Autograder.feedback_preferences
-        feedback_mode = req.feedback_mode
-
-
-        if feedback_mode == "default":
-            logger.info("Creating default reporter")
-            reporter = Reporter.create_default_reporter(result, feedback,template)
-            logger.info("Default reporter created successfully")
-
-        elif feedback_mode == "ai":
-            logger.info("Creating AI reporter")
-
-            if not all(
-                    [req.openai_key,req.redis_url, req.redis_token]):
-                error_msg = "OpenAI key, Redis URL, and Redis token are required for AI feedback mode."
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-
-            logger.info("All AI requirements validated successfully")
-
-            # Setup Redis driver
-            driver = Driver.create(req.redis_token, req.redis_url)
-            student_credentials = req.student_credentials
-
-
-            if not driver.user_exists(student_credentials):
-                driver.create_user(student_credentials)
-
-            if driver.decrement_user_quota(student_credentials):
-                quota = driver.get_user_quota(student_credentials)
-                logger.info(f"Quota check passed. Remaining quota: {quota}")
-                reporter = Reporter.create_ai_reporter(result,feedback, template, quota)
-            else:
-                logger.warning("Quota exceeded for student, falling back to default feedback.")
-                reporter = Reporter.create_default_reporter(result, feedback,template)
-
-        else:
-                raise ValueError(f"Unsupported feedback mode: {feedback_mode}")
-
-        req.reporter = reporter
-        return reporter
-
-    @staticmethod
-    def _generate_feedback():
-        req = request_context.get_request()
-        reporter = req.reporter
-        feedback_report = reporter.generate_feedback()
-        req.feedback_report = feedback_report
-        return feedback_report
-
-
-
-if __name__ == "__main__":
-    if __name__ == "__main__":
-        logging.basicConfig(level=logging.INFO)
-
-        # 1. Define submission files for web dev
-        submission_files = {
-            "index.html": """
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <title>Test Page</title>
-        <link rel="stylesheet" href="style.css">
-    </head>
-    <body>
-        <header>
-            <h1>Welcome</h1>
-        </header>
-        <main>
-            <p>This is a paragraph.</p>
-            <img src="image.jpg" alt="A descriptive alt text">
-        </main>
-        <footer>
-            <p>&copy; 2025</p>
-        </footer>
-    </body>
-    </html>
-            """,
-            "style.css": """
-    body {
-        font-family: sans-serif;
-        margin: 20px;
-    }
-    header {
-        background-color: #f0f0f0;
-        padding: 1em;
-    }
-            """
-        }
-
-        # 2. Define criteria_json for web dev
-        criteria_json = {
-            "test_library": "web_dev",  # Match the template name
-            "base": {
-                "weight": 100,
-                "subjects": {
-                    "html_structure": {
-                        "weight": 70,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "has_tag",
-                                "calls": [
-                                    ["head", 1],
-                                    ["body", 1],
-                                    ["header", 1],
-                                    ["main", 1],
-                                    ["footer", 1]
-                                ]
-                            },
-                            {
-                                "file": "index.html",
-                                "name": "check_css_linked"
-                            }
-                        ]
-                    },
-                    "accessibility": {
-                        "weight": 30,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "check_all_images_have_alt"
-                            }
-                        ]
-                    }
-                }
-            },
-            "bonus": {
-                "weight": 20,  # Example bonus weight
-                "subjects": {
-                    "best_practices": {
-                        "weight": 100,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "uses_semantic_tags"
-                            }
-                        ]
-                    }
-                }
-            },
-            "penalty": {
-                "weight": 10,  # Example penalty weight
-                "subjects": {
-                    "bad_practices": {
-                        "weight": 100,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "check_no_inline_styles"
-                            }
-                        ]
-                    }
-                }
-            }
-        }
-
-        # 3. Define feedback_json (can be simple or complex)
-        feedback_json = {
-            "general": {
-                "report_title": "Web Dev Assignment Report",
-                "show_score": True
-            },
-            "default": {
-                "category_headers": {
-                    "base": "✅ Core HTML/CSS",
-                    "bonus": "⭐ Best Practices Bonus",
-                    "penalty": "🚨 Points Deducted"
-                }
-            }
-        }
-
-        # 4. Define setup_json with file checks
-        setup_json = {
-            "file_checks": [
-                "index.html",
-                "style.css"
-            ],
-            "commands": []  # No commands needed for static web dev
-        }
-
-        # 5. Create AssignmentConfig using the web dev template
-        config = AssignmentConfig(
-            criteria=criteria_json,
-            feedback=feedback_json,
-            setup=setup_json,
-            template="webdev"  # Use the web dev template
-        )
-
-        # 6. Create AutograderRequest
-        request = AutograderRequest(
-            submission_files=submission_files,
-            assignment_config=config,
-            student_name="Local Tester",
-            student_credentials="local_tester_01",  # Credentials for local testing
-            include_feedback=True,  # Request feedback
-            feedback_mode="default"  # Use default feedback for simplicity
-        )
-
-        # 7. Run the grading process
-        logger = logging.getLogger(__name__)
-        logger.info("--- Running Local Web Dev Test ---")
-        facade_response = Autograder.grade(request)
-
-        # 8. Print the results
-        logger.info("--- Grading Complete ---")
-        print(f"Status: {facade_response.status}")
-        print(f"Final Score: {facade_response.final_score}")
-        print("\n--- Feedback ---")
-        print(facade_response.feedback)
-        print("\n--- Test Report ---")
-        if facade_response.test_report:
-            for test in facade_response.test_report:
-                print(f"- {test.subject_name}: {test.test_name} -> Score: {test.score}, Report: {test.report}")
-        else:
-            print("No test report generated.")
diff --git a/autograder/builder/models/criteria_tree.py b/autograder/builder/models/criteria_tree.py
deleted file mode 100644
index f983951..0000000
--- a/autograder/builder/models/criteria_tree.py
+++ /dev/null
@@ -1,223 +0,0 @@
-from typing import List, Any
-from autograder.context import request_context
-from autograder.core.models.test_result import TestResult
-
-
-# Assuming TestResult is defined in a separate, importable file
-# from autograder.core.models.test_result import TestResult
-
-# ===============================================================
-# 1. Classes for Test Execution
-# ===============================================================
-class TestCall:
-    """Represents a single invocation of a test function with its arguments."""
-    def __init__(self, args: List[Any]):
-        self.args = args
-
-    def __repr__(self):
-        return f"TestCall(args={self.args})"
-
-# ===============================================================
-# 2. Classes for the Tree Structure
-# ===============================================================
-
-class Test:
-    """
-    Represents a group of calls to a single test function in the library.
-    This is a LEAF node in the grading tree.
-    """
-    def __init__(self, name: str, filename: str = None):
-        self.name = name
-        self.file = filename  # The file this test operates on (e.g., "index.html")
-        self.calls: List[TestCall] = []
-
-    def add_call(self, call: TestCall):
-        self.calls.append(call)
-
-    def get_result(self, test_library, submission_files, subject_name: str) -> List[TestResult]:
-        """
-        Retrieves a TestFunction object from the library and executes it for each TestCall.
-        """
-        try:
-            # Get the TestFunction instance (e.g., HasTag()) from the library
-            test_function_instance = test_library.get_test(self.name)
-        except AttributeError as e:
-            return [TestResult(self.name, 0, f"ERROR: {e}", subject_name)]
-
-        file_content_to_pass = None
-        if self.file:
-            # --- File Injection Logic ---
-            if self.file == "all":
-                file_content_to_pass = submission_files
-            else:
-                file_content_to_pass = submission_files.get(self.file)
-                if file_content_to_pass is None:
-                    return [TestResult(self.name, 0, f"Erro: O arquivo necessário '{self.file}' não foi encontrado na submissão.", subject_name)]
-
-        # --- Execution Logic ---
-        if not self.calls:
-            # Execute with just the file content if no specific calls are defined
-            if file_content_to_pass:
-                result = test_function_instance.execute(file_content_to_pass)
-            else:
-                result = test_function_instance.execute()
-            result.subject_name = subject_name
-            return [result]
-
-        results = []
-        for call in self.calls:
-            # Execute the 'execute' method of the TestFunction instance
-            if file_content_to_pass:
-                result = test_function_instance.execute(file_content_to_pass, *call.args)
-            else:
-                result = test_function_instance.execute(*call.args)
-            result.subject_name = subject_name
-            results.append(result)
-        return results
-
-    def __repr__(self):
-        return f"Test(name='{self.name}', file='{self.file}', calls={len(self.calls)})"
-
-class Subject:
-    """
-    Represents a subject, which can contain EITHER a list of tests OR
-    a dictionary of nested subjects. This is a BRANCH or LEAF-HOLDER node.
-    """
-    def __init__(self, name, weight=0):
-        self.name = name
-        self.weight = weight
-        self.tests: List[Test] | None = None
-        self.subjects: dict[str, 'Subject'] | None = None
-
-    def __repr__(self):
-        if self.subjects is not None:
-            return f"Subject(name='{self.name}', weight={self.weight}, subjects={len(self.subjects)})"
-        return f"Subject(name='{self.name}', weight={self.weight}, tests={self.tests})"
-
-
-class TestCategory:
-    """
-    Represents one of the three main categories: base, bonus, or penalty.
-    Can contain EITHER a list of tests OR a dictionary of subjects (not both).
-    """
-    def __init__(self, name, max_score=100):
-        self.name = name
-        self.max_score = max_score
-        self.subjects: dict[str, Subject] | None = None
-        self.tests: List[Test] | None = None
-
-    def set_weight(self, weight):
-        self.max_score = weight
-
-    def add_subject(self, subject: Subject):
-        if self.subjects is None:
-            self.subjects = {}
-        self.subjects[subject.name] = subject
-
-    def __repr__(self):
-        if self.tests is not None:
-            return f"TestCategory(name='{self.name}', max_score={self.max_score}, tests={len(self.tests)})"
-        return f"TestCategory(name='{self.name}', max_score={self.max_score}, subjects={list(self.subjects.keys()) if self.subjects else []})"
-
-
-class Criteria:
-    """The ROOT of the criteria tree."""
-    def __init__(self, bonus_weight=0, penalty_weight=0):
-        self.base = TestCategory("base")
-        self.bonus = TestCategory("bonus", max_score=bonus_weight)
-        self.penalty = TestCategory("penalty", max_score=penalty_weight)
-
-    def __repr__(self):
-        return f"Criteria(categories=['base', 'bonus', 'penalty'])"
-
-    def print_tree(self):
-        """Prints a visual representation of the entire criteria tree."""
-        print(f"🌲 Criteria Tree")
-        self._print_category(self.base, prefix="  ")
-        self._print_category(self.bonus, prefix="  ")
-        self._print_category(self.penalty, prefix="  ")
-
-    def _print_category(self, category: TestCategory, prefix: str):
-        """Helper method to print a category and its subjects or tests."""
-        if not category.subjects and not category.tests:
-            return
-        print(f"{prefix}📁 {category.name.upper()} (max_score: {category.max_score})")
-        
-        if category.subjects:
-            for subject in category.subjects.values():
-                self._print_subject(subject, prefix=prefix + "    ")
-        
-        if category.tests:
-            for test in category.tests:
-                print(f"{prefix}    - 🧪 {test.name} (file: {test.file})")
-                for call in test.calls:
-                    print(f"{prefix}      - Parameters: {call.args}")
-
-    def _print_subject(self, subject: Subject, prefix: str):
-        """Recursive helper method to print a subject and its contents."""
-        print(f"{prefix}📘 {subject.name} (weight: {subject.weight})")
-
-        if subject.subjects is not None:
-            for sub in subject.subjects.values():
-                self._print_subject(sub, prefix=prefix + "    ")
-
-        if subject.tests is not None:
-            for test in subject.tests:
-                print(f"{prefix}  - 🧪 {test.name} (file: {test.file})")
-                for call in test.calls:
-                    print(f"{prefix}    - Parameters: {call.args}")
-
-    def print_pre_executed_tree(self):
-        """Prints a visual representation of the entire pre-executed criteria tree."""
-        print(f"🌲 Pre-Executed Criteria Tree")
-        self._print_pre_executed_category(self.base, prefix="  ")
-        self._print_pre_executed_category(self.bonus, prefix="  ")
-        self._print_pre_executed_category(self.penalty, prefix="  ")
-
-    def _print_pre_executed_category(self, category: TestCategory, prefix: str):
-        """Helper method to print a category and its pre-executed subjects or tests."""
-        if not category.subjects and not category.tests:
-            return
-        print(f"{prefix}📁 {category.name.upper()} (max_score: {category.max_score})")
-        
-        if category.subjects:
-            for subject in category.subjects.values():
-                self._print_pre_executed_subject(subject, prefix=prefix + "    ")
-        
-        if category.tests:
-            # In a pre-executed tree, category.tests contains TestResult objects
-            for result in category.tests:
-                if isinstance(result, TestResult):
-                    params_str = f" (Parameters: {result.parameters})" if result.parameters else ""
-                    print(f"{prefix}    - 📝 {result.test_name}{params_str} -> Score: {result.score}")
-                else:
-                    print(f"{prefix}    - ? Unexpected item in tests list: {result}")
-
-    def _print_pre_executed_subject(self, subject: Subject, prefix: str):
-        """Recursive helper method to print a subject and its pre-executed test results."""
-        print(f"{prefix}📘 {subject.name} (weight: {subject.weight})")
-
-        if subject.subjects is not None:
-            for sub in subject.subjects.values():
-                self._print_pre_executed_subject(sub, prefix=prefix + "    ")
-
-        if subject.tests is not None:
-            # In a pre-executed tree, subject.tests contains TestResult objects
-
-            # In the regular tree, subject.tests contains "Test" objects
-            for result in subject.tests:
-                if isinstance(result, TestResult):
-                    params_str = f" (Parameters: {result.parameters})" if result.parameters else ""
-                    print(f"{prefix}  - 📝 {result.test_name}{params_str} -> Score: {result.score}")
-
-                elif isinstance(result, Test):
-                    print(f"{prefix} - 🧪 {result.name} (file: {result.file})")
-                    """Added the symbol identificator to match the previous formatting"""
-                    for call in result.calls:
-                        print(f"{prefix}    - Parameters: {call.args}")
-                else:
-                    # Fallback for unexpected types
-                    print(f"{prefix}  - ? Unexpected item in tests list: {result}")
-
-
-
diff --git a/autograder/builder/pre_flight.py b/autograder/builder/pre_flight.py
deleted file mode 100644
index 51bf8e1..0000000
--- a/autograder/builder/pre_flight.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import logging
-from autograder.context import request_context
-class PreFlight:
-    def __init__(self,required_files=None,setup_commands=None):
-        self.required_files = required_files if required_files else []
-        self.setup_commands = setup_commands if setup_commands else []
-        self.fatal_errors = []
-        self.logger = logging.getLogger("PreFlight")
-
-    def check_required_files(self):
-        """
-        Checks for the existence of required files in the submission.
-        """
-        request = request_context.get_request()
-        submission_files = request.submission_files
-        self.logger.debug("Checking required files")
-        for file in self.required_files:
-            if file not in submission_files:
-                error_msg = f"**Erro:** Arquivo ou diretório obrigatório não encontrado: `'{file}'`"
-                self.logger.error(error_msg)
-                self.fatal_errors.append({"type": "file_check", "message": error_msg})
-
-    @classmethod
-    def run(cls):
-        """
-        Creates a PreFlight instance and runs the pre-flight checks.
-        """
-        request = request_context.get_request()
-        setup_dict = request.assignment_config.setup
-        preflight = cls(
-            required_files=setup_dict.get('file_checks', []),
-            setup_commands=setup_dict.get('commands', [])
-        )
-        preflight.check_required_files()
-        # Future: Add command execution logic here if needed
-        return preflight.fatal_errors
-
-
diff --git a/autograder/builder/template_library/library.py b/autograder/builder/template_library/library.py
deleted file mode 100644
index ff9103c..0000000
--- a/autograder/builder/template_library/library.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import importlib.util
-import inspect
-import tempfile
-import os
-from autograder.builder.models.template import Template
-
-
-class TemplateLibrary:
-    @staticmethod
-    def get_template(template_name: str, custom_template_content: str = None, clean=False):
-        if template_name == "custom":
-            if not custom_template_content:
-                raise ValueError("Custom template content must be provided for 'custom' template type.")
-            return TemplateLibrary._load_custom_template_from_content(custom_template_content)
-
-        if template_name == "webdev":
-            from autograder.builder.template_library.templates.web_dev import WebDevTemplate
-            return WebDevTemplate(clean)
-        if template_name == "api":
-            from autograder.builder.template_library.templates.api_testing import ApiTestingTemplate
-            return ApiTestingTemplate(clean)
-        if template_name == "essay":
-            from autograder.builder.template_library.templates.essay_grader import EssayGraderTemplate
-            return EssayGraderTemplate(clean)
-        if template_name == "io":
-            from autograder.builder.template_library.templates.input_output import InputOutputTemplate
-            return InputOutputTemplate(clean)
-        else:
-            raise ValueError(f"Template '{template_name}' not found.")
-
-    @staticmethod
-    def _load_custom_template_from_content(template_content: str):
-        """Load a custom template directly from string content without file placement."""
-        spec = importlib.util.spec_from_loader("custom_template", loader=None)
-        custom_module = importlib.util.module_from_spec(spec)
-
-        # Execute the template code directly in the module namespace
-        exec(template_content, custom_module.__dict__)
-
-        # Find and return the Template subclass
-        for name, obj in inspect.getmembers(custom_module):
-            if inspect.isclass(obj) and issubclass(obj, Template) and obj is not Template:
-                return obj()
-
-        raise ImportError("No class inheriting from 'Template' found in the custom template content.")
-
-    @staticmethod
-    def _load_custom_template(file_path: str):
-        """Legacy method for file-based custom templates."""
-        spec = importlib.util.spec_from_file_location("custom_template", file_path)
-        custom_module = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(custom_module)
-
-        for name, obj in inspect.getmembers(custom_module):
-            if inspect.isclass(obj) and issubclass(obj, Template) and obj is not Template:
-                return obj()
-
-        raise ImportError(f"No class inheriting from 'Template' found in {file_path}")
-    
-
-    @staticmethod
-    def get_template_info(template_name: str)-> dict:
-        """Gets all the details of a template.
-        param template_name: The name of the template to retrieve.
-        return: A dictionary with all the template details.
-        example:
-        {
-            "name": "I/O",
-            "description": "Template for testing input/output functions.",
-            "tests": [
-                {
-                    "name": "test_function_1",
-                    "description": "Tests function 1 with various inputs.",
-                    "parameters": [
-                        {
-                            "name": "input1",
-                            "description": "Description of input1",
-                            "type": "string"
-                        }
-                    ]
-                }, 
-                ...
-        """
-        #1. Retrieve an instance of the template from the library
-        template = TemplateLibrary.get_template(template_name, clean=True)
-        if not template:
-            raise ValueError(f"Template '{template_name}' not found.")
-        
-        #2. Prepare the main dictionary with basic template info
-        template_data = {
-            "template_name": template.template_name,
-            "template_description": template.template_description,
-            "tests": []
-        }
-
-        for test in template.get_tests().values():
-            test_data = {
-                "name": test.name,
-                "description": test.description,
-                "required_file": test.required_file,
-                "parameters": []
-            }
-            for param in test.parameter_description:
-                test_data["parameters"].append({
-                    "name": param.name,
-                    "description": param.description,
-                    "type": param.type
-                })
-            template_data["tests"].append(test_data)
-
-        return template_data
diff --git a/autograder/builder/tree_builder.py b/autograder/builder/tree_builder.py
deleted file mode 100644
index 373159f..0000000
--- a/autograder/builder/tree_builder.py
+++ /dev/null
@@ -1,271 +0,0 @@
-from typing import List, Dict, Any
-
-from autograder.builder.models.criteria_tree import Criteria, Subject, Test, TestCall, TestResult
-from autograder.builder.models.template import Template
-from autograder.context import request_context
-
-class CriteriaTree:
-    """A factory for creating a Criteria object from a configuration dictionary."""
-    @staticmethod
-    def build_pre_executed_tree(template: Template) -> Criteria:
-        """ Builds a Criteria tree and pre-executes all tests, having leaves as TestResult objects."""
-
-        request = request_context.get_request()
-        config_dict = request.assignment_config.criteria
-        submission_files = request.submission_files
-        criteria = Criteria()
-
-        for category_name in ["base", "bonus", "penalty"]:
-            if category_name in config_dict:
-                category = getattr(criteria, category_name)
-                category_data = config_dict[category_name]
-
-                if "weight" in category_data:
-                    category.max_score = category_data.get("weight", 100)
-
-                # Validate that category doesn't have both subjects and tests
-                if "subjects" in category_data and "tests" in category_data:
-                    raise ValueError(f"Config error: Category '{category_name}' cannot have both 'tests' and 'subjects'.")
-
-                if "subjects" in category_data:
-                    subjects = [
-                        CriteriaTree._parse_and_execute_subject(s_name, s_data, template, submission_files)
-                        for s_name, s_data in category_data["subjects"].items()
-                    ]
-                    CriteriaTree._balance_subject_weights(subjects)
-                    for subject in subjects:
-                        category.add_subject(subject)
-                elif "tests" in category_data:
-                    # Handle tests directly at category level
-                    parsed_tests = CriteriaTree._parse_tests(category_data["tests"])
-                    executed_tests = []
-                    for test in parsed_tests:
-                        test_results = test.get_result(template, submission_files, category_name)
-                        executed_tests.extend(test_results)
-                    category.tests = executed_tests
-        return criteria
-
-    @staticmethod
-    def build_non_executed_tree() -> Criteria:
-        """Builds the entire criteria tree, including balancing subject weights."""
-        criteria = Criteria()
-        request = request_context.get_request()
-        config_dict = request.assignment_config.criteria
-        for category_name in ["base", "bonus", "penalty"]:
-            if category_name in config_dict:
-                category = getattr(criteria, category_name)
-                category_data = config_dict[category_name]
-
-                # Set max_score for bonus and penalty categories
-                if "weight" in category_data:
-                    category.max_score = category_data.get("weight", 100)
-
-                # Validate that category doesn't have both subjects and tests
-                if "subjects" in category_data and "tests" in category_data:
-                    raise ValueError(f"Config error: Category '{category_name}' cannot have both 'tests' and 'subjects'.")
-
-                if "subjects" in category_data:
-                    subjects = [
-                        CriteriaTree._parse_subject(s_name, s_data)
-                        for s_name, s_data in category_data["subjects"].items()
-                    ]
-                    CriteriaTree._balance_subject_weights(subjects)
-                    for subject in subjects:
-                        category.add_subject(subject)
-                elif "tests" in category_data:
-                    # Handle tests directly at category level
-                    category.tests = CriteriaTree._parse_tests(category_data["tests"])
-        return criteria
-
-    @staticmethod
-    def _balance_subject_weights(subjects: List[Subject]):
-        """Balances the weights of a list of sibling subjects to sum to 100."""
-        total_weight = sum(s.weight for s in subjects)
-        if total_weight > 0 and total_weight != 100:
-            scaling_factor = 100 / total_weight
-            for subject in subjects:
-                subject.weight *= scaling_factor
-
-    @staticmethod
-    def _parse_subject(subject_name: str, subject_data: dict) -> Subject:
-        """Recursively parses a subject and balances the weights of its children."""
-        if "tests" in subject_data and "subjects" in subject_data:
-            raise ValueError(f"Config error: Subject '{subject_name}' cannot have both 'tests' and 'subjects'.")
-
-        subject = Subject(subject_name, subject_data.get("weight", 0))
-        if "tests" in subject_data:
-            subject.tests = CriteriaTree._parse_tests(subject_data["tests"])
-        elif "subjects" in subject_data:
-            child_subjects = [
-                CriteriaTree._parse_subject(sub_name, sub_data)
-                for sub_name, sub_data in subject_data["subjects"].items()
-            ]
-            CriteriaTree._balance_subject_weights(child_subjects)
-            subject.subjects = {s.name: s for s in child_subjects}
-        else:
-            subject.tests = []
-        return subject
-
-    @staticmethod
-    def _parse_and_execute_subject(subject_name: str, subject_data: dict, template: Template, submission_files: dict) -> Subject:
-        """Recursively parses a subject, executes its tests, and balances the weights of its children."""
-        if "tests" in subject_data and "subjects" in subject_data:
-            raise ValueError(f"Config error: Subject '{subject_name}' cannot have both 'tests' and 'subjects'.")
-
-        subject = Subject(subject_name, subject_data.get("weight", 0))
-
-        if "tests" in subject_data:
-            parsed_tests = CriteriaTree._parse_tests(subject_data["tests"])
-            executed_tests = []
-            for test in parsed_tests:
-                # The run method executes the test and returns a list of TestResult objects
-                test_results = test.get_result(template, submission_files, subject_name)
-                executed_tests.extend(test_results)
-            subject.tests = executed_tests  # Store TestResult objects instead of Test objects
-        elif "subjects" in subject_data:
-            child_subjects = [
-                CriteriaTree._parse_and_execute_subject(sub_name, sub_data, template, submission_files)
-                for sub_name, sub_data in subject_data["subjects"].items()
-            ]
-            CriteriaTree._balance_subject_weights(child_subjects)
-            subject.subjects = {s.name: s for s in child_subjects}
-        else:
-            subject.tests = []
-        return subject
-
-    @staticmethod
-    def _parse_tests(test_data: list) -> List[Test]:
-        """Parses a list of test definitions from the configuration."""
-        parsed_tests = []
-        for test_item in test_data:
-            if isinstance(test_item, str):
-                # Handle simple test names (e.g., "check_no_unclosed_tags")
-                test = Test(name=test_item)  # Default file
-                test.add_call(TestCall(args=[]))
-                parsed_tests.append(test)
-
-            elif isinstance(test_item, dict):
-                # Handle complex test definitions
-                test_name = test_item.get("name")
-                test_file = test_item.get("file")
-                if not test_name:
-                    raise ValueError(f"Test definition is missing 'name': {test_item}")
-
-                test = Test(name=test_name, filename=test_file)
-
-                if "calls" in test_item:
-                    for call_args in test_item["calls"]:
-                        test.add_call(TestCall(args=call_args))
-                else:
-                    # If no 'calls' are specified, it's a single call with no arguments
-                    test.add_call(TestCall(args=[]))
-
-                parsed_tests.append(test)
-
-        return parsed_tests
-
-
-
-if __name__ == "__main__":
-    criteria_json = {
-  "test_library": "essay ai grader",
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "foundations": {
-        "weight": 60,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "thesis_statement"
-          },
-          {
-            "file": "essay.txt",
-            "name": "clarity_and_cohesion"
-          },
-          {
-            "file": "essay.txt",
-            "name": "grammar_and_spelling"
-          }
-        ]
-      },
-      "prompt_adherence": {
-        "weight": 40,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "adherence_to_prompt",
-            "calls": [
-              [ "Analyze the primary causes of the Industrial Revolution and its impact on 19th-century society." ]
-            ]
-          }
-        ]
-      }
-    }
-  },
-  "bonus": {
-    "weight": 30,
-    "subjects": {
-      "rhetorical_skill": {
-        "weight": 70,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "counterargument_handling"
-          },
-          {
-            "file": "essay.txt",
-            "name": "vocabulary_and_diction"
-          },
-          {
-            "file": "essay.txt",
-            "name": "sentence_structure_variety"
-          }
-        ]
-      },
-      "deeper_analysis": {
-        "weight": 30,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "topic_connection",
-            "calls": [
-              [ "technological innovation", "social inequality" ]
-            ]
-          }
-        ]
-      }
-    }
-  },
-  "penalty": {
-    "weight": 25,
-    "subjects": {
-      "logical_integrity": {
-        "weight": 100,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "logical_fallacy_check"
-          },
-          {
-            "file": "essay.txt",
-            "name": "bias_detection"
-          },
-          {
-              "file": "essay.txt",
-              "name": "originality_and_plagiarism"
-          }
-        ]
-      }
-    }
-  }
-}
-    submission_files = {"essay.txt": """Artificial intelligence (AI) is no longer a concept confined to science fiction; it is a transformative force actively reshaping industries and redefining the nature of work. Its integration into the modern workforce presents a profound duality: on one hand, it offers unprecedented opportunities for productivity and innovation, while on the other, it poses significant challenges related to job displacement and economic inequality. Navigating this transition successfully requires a proactive and nuanced approach from policymakers, businesses, and individuals alike.
-The primary benefit of AI in the workplace is its capacity to augment human potential and drive efficiency. AI-powered systems can analyze vast datasets in seconds, automating routine cognitive and manual tasks, which frees human workers to focus on more complex, creative, and strategic endeavors. For instance, in medicine, AI algorithms assist radiologists in detecting tumors with greater accuracy, while in finance, they identify fraudulent transactions far more effectively than any human team. This collaboration between human and machine not only boosts output but also creates new roles centered around AI development, ethics, and system maintenance—jobs that did not exist a decade ago.
-However, this technological advancement casts a significant shadow of disruption. The same automation that drives efficiency also leads to job displacement, particularly for roles characterized by repetitive tasks. Assembly line workers, data entry clerks, and even some paralegal roles face a high risk of obsolescence. This creates a widening skills gap, where demand for high-level technical skills soars while demand for traditional skills plummets. Without robust mechanisms for reskilling and upskilling the existing workforce, this gap threatens to exacerbate socio-economic inequality, creating a divide between those who can command AI and those who are displaced by it. There are many gramatical errors in this sentence, for testing purposes.
-The most critical challenge, therefore, is not to halt technological progress but to manage its societal impact. A multi-pronged strategy is essential. Governments and educational institutions must collaborate to reform curricula, emphasizing critical thinking, digital literacy, and lifelong learning. Furthermore, corporations have a responsibility to invest in their employees through continuous training programs. Finally, strengthening social safety nets, perhaps through concepts like Universal Basic Income (UBI) or enhanced unemployment benefits, may be necessary to support individuals as they navigate this volatile transition period.
-In conclusion, AI is a double-edged sword. Its potential to enhance productivity and create new avenues for growth is undeniable, but so are the risks of displacement and inequality. The future of work will not be a battle of humans versus machines, but rather a story of adaptation. By investing in education, promoting equitable policies, and fostering a culture of continuous learning, we can harness the power of AI to build a more prosperous and inclusive workforce for all."""}
-    #tree = CriteriaTree.build_pre_executed_tree(criteria_json, WebDevLibrary(), submission_files)
-    tree = CriteriaTree.build_non_executed_tree(criteria_json)
-    #tree.print_pre_executed_tree()
-    tree.print_tree()
\ No newline at end of file
diff --git a/autograder/context.py b/autograder/context.py
deleted file mode 100644
index 6838e12..0000000
--- a/autograder/context.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from connectors.models.autograder_request import AutograderRequest
-
-
-class RequestContext:
-    """
-    A Singleton class to hold the active AutograderRequest object.
-    This provides a global point of access to request data, avoiding the need
-    to pass the request object through multiple layers of the application.
-    """
-    _instance = None
-
-    @classmethod
-    def get_instance(cls):
-        """Gets the single instance of the class."""
-        if cls._instance is None:
-            cls._instance = cls.__new__(cls)
-            cls._instance.request = None
-        return cls._instance
-
-    def set_request(self, autograder_request: AutograderRequest | None):
-        """Sets the active autograder request for the current session."""
-        self.request = autograder_request
-        return self.request
-
-    def get_request(self):
-        """Gets the active autograder request."""
-        if self.request is None:
-            raise Exception("RequestContext has not been initialized. Call set_request() first.")
-        return self.request
-
-# Create a globally accessible instance
-request_context = RequestContext.get_instance()
diff --git a/autograder/core/grading/grader.py b/autograder/core/grading/grader.py
deleted file mode 100644
index bde6b74..0000000
--- a/autograder/core/grading/grader.py
+++ /dev/null
@@ -1,209 +0,0 @@
-from typing import List, Dict, Optional
-
-from autograder.context import request_context
-from autograder.builder.tree_builder import *
-from autograder.core.models.result import Result
-from autograder.core.models.test_result import TestResult
-
-
-class Grader:
-    """
-    Traverses a Criteria tree, executes tests, and calculates a weighted score.
-    Only includes scores from categories (base, bonus, penalty) that contain tests.
-    """
-
-    def __init__(self, criteria_tree: 'Criteria', test_library: object):
-        self.criteria = criteria_tree
-        self.test_library = test_library
-        self.base_results: List['TestResult'] = []
-        self.bonus_results: List['TestResult'] = []
-        self.penalty_results: List['TestResult'] = []
-
-    def run(self) -> 'Result':
-        request = request_context.get_request()
-        submission_files = request.submission_files
-        author_name = request.student_name
-        final_score = self._run(submission_files)
-        return Result(
-            final_score=final_score,
-            author=author_name,
-            submission_files=submission_files,
-            base_results=self.base_results,
-            bonus_results=self.bonus_results,
-            penalty_results=self.penalty_results
-        )
-
-
-    def _run(self, submission_files: Dict) -> float:
-        """
-        Runs the entire grading process and returns the final calculated score.
-        """
-        print("\n--- STARTING GRADING PROCESS ---")
-        # Step 1: Grade categories. The methods will return None if no tests exist.
-        ## CHANGED: Coalesce None to 0.0 to signify that an empty category contributes nothing to the score.
-        base_score = self._grade_subject_or_category(self.criteria.base, submission_files, self.base_results) or 0.0
-        bonus_score = self._grade_subject_or_category(self.criteria.bonus, submission_files, self.bonus_results) or 0.0
-        penalty_points = self._calculate_penalty_points(self.criteria.penalty, submission_files,
-                                                        self.penalty_results) or 0.0
-
-        # Step 3: Apply the final scoring logic
-        final_score = self._calculate_final_score(base_score, bonus_score, penalty_points)
-
-        print("\n--- GRADING COMPLETE ---")
-        print(f"Aggregated Base Score: {base_score:.2f}")
-        print(f"Aggregated Bonus Score: {bonus_score:.2f}")
-        print(f"Total Penalty Points to Subtract: {penalty_points:.2f}")
-        print("-" * 25)
-        print(f"Final Calculated Score: {final_score:.2f}")
-        print("-" * 25)
-
-        return final_score
-
-    def _grade_subject_or_category(self, current_node: 'Subject' or 'TestCategory', submission_files: Dict,
-                                   results_list: List['TestResult'], depth=0) -> Optional[float]:
-        """
-        Recursively grades a subject or category, returning a weighted score or None if no tests are found.
-        """
-        prefix = "    " * depth
-
-        # Base case: Node is a leaf with tests
-        if hasattr(current_node, 'tests') and current_node.tests:
-            print(f"\n{prefix}📘 Grading {current_node.name}...")
-            subject_test_results = []
-            for test in current_node.tests:
-                test_results = test.get_result(self.test_library, submission_files, current_node.name)
-                subject_test_results.extend(test_results)
-
-            if not subject_test_results:
-                return None  # No tests were actually run
-
-            results_list.extend(subject_test_results)
-            scores = [res.score for res in subject_test_results]
-            average_score = sum(scores) / len(scores)
-            print(f"{prefix}  -> Average score: {average_score:.2f}")
-            return average_score
-
-        # Recursive case: Node is a branch (category or subject with sub-subjects)
-        child_subjects_classes = getattr(current_node, 'subjects', {})
-        if not child_subjects_classes:
-            return None  # No tests and no children means this branch is empty
-        child_subjects = child_subjects_classes.values()
-        if not child_subjects:
-            return None
-        print(f"\n{prefix}📘 Grading {current_node.name}...")
-
-        child_scores_map = {sub.name: self._grade_subject_or_category(sub, submission_files, results_list, depth + 1)
-                            for sub in child_subjects}
-
-        # Filter out children that had no tests (returned None)
-        valid_children = [sub for sub in child_subjects if child_scores_map[sub.name] is not None]
-
-        if not valid_children:
-            return None  # No children in this branch contained any tests
-
-        total_weight = sum(sub.weight for sub in valid_children)
-
-        # If weights are 0, calculate a simple average of the valid children
-        if total_weight == 0:
-            scores = [child_scores_map[sub.name] for sub in valid_children]
-            return sum(scores) / len(scores)
-
-        # Otherwise, calculate the weighted score based only on valid children
-        weighted_score = 0
-        for sub in valid_children:
-            child_score = child_scores_map[sub.name]
-            weighted_score += child_score * (sub.weight / total_weight)
-
-        print(f"\n{prefix}  -> Weighted score for '{current_node.name}': {weighted_score:.2f}")
-        return weighted_score
-
-    def _calculate_penalty_points(self, penalty_category: 'TestCategory', submission_files: Dict,
-                                  results_list: List['TestResult']) -> Optional[float]:
-        """
-        Calculates the total penalty points. Returns None if no penalty tests exist.
-        """
-        print(f"\n Penalizing {penalty_category.name}...")
-
-        # This is a simplified entry point; the main logic is in _calculate_subject_penalty
-        # We treat the main penalty category like a subject to start the recursion.
-        return self._calculate_subject_penalty(penalty_category, submission_files, results_list, depth=0)
-
-    def _calculate_subject_penalty(self, subject: 'Subject', submission_files: Dict, results_list: List['TestResult'],
-                                   depth=0) -> Optional[float]:
-        """
-        Helper to calculate penalty for a single subject or category.
-        Returns penalty points (0-100) or None if no tests are found.
-        """
-        prefix = "    " * depth
-
-        # Base Case: This node is a leaf with tests
-        if hasattr(subject, 'tests') and subject.tests:
-            test_penalties = []
-            for test in subject.tests:
-                test_results = test.get_result(self.test_library, submission_files, subject.name)
-                if not test_results:
-                    continue
-                results_list.extend(test_results)
-                # Penalty incurred = 100 - score
-                penalty_incurred = (100 - sum(res.score for res in test_results) / len(test_results))
-                test_penalties.append(penalty_incurred)
-
-            if not test_penalties:
-                return None  # No tests were actually run
-
-            avg_penalty_for_subject = sum(test_penalties) / len(test_penalties)
-            print(f"{prefix}  -> Average penalty for '{subject.name}': {avg_penalty_for_subject:.2f}")
-            return avg_penalty_for_subject
-
-        # Recursive Case: This node is a branch with children
-        child_subjects_classes = getattr(subject, 'subjects', {})
-        if not child_subjects_classes:
-            return None  # No tests and no children
-        child_subjects = child_subjects_classes.values()
-        child_penalties_map = {sub.name: self._calculate_subject_penalty(sub, submission_files, results_list, depth + 1)
-                               for sub in child_subjects}
-
-        valid_children = [sub for sub in child_subjects if child_penalties_map[sub.name] is not None]
-
-        if not valid_children:
-            return None  # No children had penalty tests
-
-        total_weight = sum(sub.weight for sub in valid_children)
-        if total_weight == 0:
-            penalties = [child_penalties_map[sub.name] for sub in valid_children]
-            return sum(penalties) / len(penalties)  # Average of valid penalties
-
-        weighted_penalty = 0
-        for sub in valid_children:
-            child_penalty = child_penalties_map[sub.name]
-            weighted_penalty += child_penalty * (sub.weight / total_weight)
-
-        print(f"\n{prefix}  -> Weighted penalty for '{subject.name}': {weighted_penalty:.2f}")
-        return weighted_penalty
-
-    def _calculate_final_score(self, base_score: float, bonus_score: float, penalty_points: float) -> float:
-        """
-        Applies the final scoring logic with the corrected penalty calculation.
-        """
-        bonus_weight = self.criteria.bonus.max_score
-        penalty_weight = self.criteria.penalty.max_score
-
-        final_score = base_score
-
-        if final_score < 100:
-            bonus_points_earned = (bonus_score / 100) * bonus_weight
-            final_score += bonus_points_earned
-
-        final_score = min(100.0, final_score)
-
-        # The penalty_points now represents the percentage of the total penalty to apply
-        penalty_points_to_subtract = (penalty_points / 100) * penalty_weight
-        final_score -= penalty_points_to_subtract
-
-        print(f"\nApplying Final Calculations:")
-        print(f"  Base Score: {base_score:.2f}")
-        print(f"  Bonus Points Added: {(bonus_score / 100) * bonus_weight:.2f}")
-        print(f"  Score Before Penalty: {min(100.0, final_score + penalty_points_to_subtract):.2f}")
-        print(f"  Penalty Points Subtracted: {penalty_points_to_subtract:.2f}")
-
-        return max(0.0, final_score)
\ No newline at end of file
diff --git a/autograder/core/models/feedback_preferences.py b/autograder/core/models/feedback_preferences.py
deleted file mode 100644
index 95d23c3..0000000
--- a/autograder/core/models/feedback_preferences.py
+++ /dev/null
@@ -1,166 +0,0 @@
-from typing import List, Dict, Any, Optional
-from pydantic import BaseModel, Field
-from autograder.context import request_context
-
-
-class LearningResource(BaseModel):
-    """Represents a single online resource linked to specific test names."""
-    url: str
-    description: str
-    linked_tests: List[str]
-
-    def __repr__(self) -> str:
-        return f"LearningResource(url='{self.url}', tests={self.linked_tests})"
-
-
-class GeneralPreferences(BaseModel):
-    """Preferences applicable to both Default and AI reporters."""
-    report_title: str = "Relatório de Avaliação"
-    show_score: bool = True
-    show_passed_tests: bool = False
-    add_report_summary: bool = True
-    online_content: List[LearningResource] = Field(default_factory=list)
-
-
-class AiReporterPreferences(BaseModel):
-    """Preferences specific to the AI Reporter."""
-    provide_solutions: str = "hint"
-    feedback_tone: str = "encouraging but direct"
-    feedback_persona: str = "Code Buddy"
-    assignment_context: str = ""
-    extra_orientations: str = ""
-    submission_files_to_read: List[str] = Field(default_factory=list)
-
-
-class DefaultReporterPreferences(BaseModel):
-    """Preferences specific to the Default (template-based) Reporter."""
-    category_headers: Dict[str, str] = Field(
-        default_factory=lambda: {
-            "base": "✅ Requisitos Essenciais",
-            "bonus": "⭐ Pontos Extras",
-            "penalty": "❌ Pontos a Melhorar"
-        }
-    )
-
-
-class FeedbackPreferences(BaseModel):
-    """
-    A unified model to store all feedback preferences, including the new
-    test-linked learning resources and legacy AI configurations.
-    """
-    general: GeneralPreferences = Field(default_factory=GeneralPreferences)
-    ai: AiReporterPreferences = Field(default_factory=AiReporterPreferences)
-    default: DefaultReporterPreferences = Field(default_factory=DefaultReporterPreferences)
-
-    @classmethod
-    def from_dict(cls) -> 'FeedbackPreferences':
-        """
-        Creates a FeedbackPreferences object from a dictionary, with defaults.
-        """
-        request = request_context.get_request()
-        config_dict = request.assignment_config.feedback
-
-        # --- Parse General Preferences, including the new online_content ---
-        general_prefs_data = config_dict.get('general', {}).copy()
-        online_content_data = general_prefs_data.pop('online_content', [])
-
-        # Create LearningResource objects
-        online_resources = [LearningResource(**res) for res in online_content_data]
-        general_prefs_data['online_content'] = online_resources
-        
-        general = GeneralPreferences(**general_prefs_data)
-
-        # --- Parse AI and Default Preferences ---
-        ai_prefs_data = config_dict.get('ai', {})
-        default_prefs_data = config_dict.get('default', {})
-
-        ai = AiReporterPreferences(**ai_prefs_data)
-        default = DefaultReporterPreferences(**default_prefs_data)
-
-        return cls(general=general, ai=ai, default=default)
-
-
-if __name__ == '__main__':
-    feedback_config = {
-        "general": {
-            "report_title": "Relatório Final - Desafio Web",
-            "add_report_summary": True,
-            "online_content": [
-                {
-                    "url": "https://developer.mozilla.org/pt-BR/docs/Web/HTML/Element/img",
-                    "description": "Guia completo sobre a tag <img>.",
-                    "linked_tests": ["check_all_images_have_alt"]
-                }
-            ]
-        },
-        "ai": {
-            "assignment_context": "Este é um desafio focado em HTML semântico e CSS responsivo.",
-            "feedback_persona": "Professor Sênior"
-        },
-        "default": {
-            "category_headers": {
-                "base": "✔️ Requisitos Obrigatórios",
-                "penalty": "🚨 Pontos de Atenção"
-            }
-        }
-    }
-
-    # ===============================================================
-    # 2. CREATE THE PREFERENCES OBJECT FROM THE DICTIONARY
-    # ===============================================================
-    # The .from_dict() method will parse the dictionary and fill in any missing
-    # values with the defaults defined in the class.
-    try:
-        # Note: For standalone testing, you'd need to mock request_context
-        # For now, creating directly for demonstration
-        preferences = FeedbackPreferences(
-            general=GeneralPreferences(
-                report_title="Relatório Final - Desafio Web",
-                add_report_summary=True,
-                online_content=[
-                    LearningResource(
-                        url="https://developer.mozilla.org/pt-BR/docs/Web/HTML/Element/img",
-                        description="Guia completo sobre a tag <img>.",
-                        linked_tests=["check_all_images_have_alt"]
-                    )
-                ]
-            ),
-            ai=AiReporterPreferences(
-                assignment_context="Este é um desafio focado em HTML semântico e CSS responsivo.",
-                feedback_persona="Professor Sênior"
-            ),
-            default=DefaultReporterPreferences(
-                category_headers={
-                    "base": "✔️ Requisitos Obrigatórios",
-                    "penalty": "🚨 Pontos de Atenção"
-                }
-            )
-        )
-
-        # ===============================================================
-        # 3. VERIFY THE PARSED VALUES
-        # ===============================================================
-        print("--- FeedbackPreferences object created successfully ---\n")
-
-        # --- Verify General Preferences ---
-        print("✅ General Preferences:")
-        print(f"  - Report Title: '{preferences.general.report_title}' (Loaded from config)")
-        print(f"  - Show Score: {preferences.general.show_score} (Using default value)")
-        print(f"  - Online Content Items: {len(preferences.general.online_content)} (Loaded from config)")
-        print(f"    - First item URL: {preferences.general.online_content[0].url}")
-        print(f"    - Linked to tests: {preferences.general.online_content[0].linked_tests}")
-
-        # --- Verify AI Preferences ---
-        print("\n🤖 AI Reporter Preferences:")
-        print(f"  - Feedback Persona: '{preferences.ai.feedback_persona}' (Loaded from config)")
-        print(f"  - Feedback Tone: '{preferences.ai.feedback_tone}' (Using default value)")
-        print(f"  - Assignment Context: '{preferences.ai.assignment_context}' (Loaded from config)")
-
-        # --- Verify Default Reporter Preferences ---
-        print("\n📝 Default Reporter Preferences:")
-        print(f"  - Base Header: '{preferences.default.category_headers['base']}' (Loaded from config)")
-        # 'bonus' was not in the config, so it should use the default from the class
-        print(f"  - Bonus Header: '{preferences.default.category_headers['bonus']}' (Using default value)")
-
-    except Exception as e:
-        print(f"An error occurred: {e}")
\ No newline at end of file
diff --git a/autograder/core/report/ai_reporter.py b/autograder/core/report/ai_reporter.py
deleted file mode 100644
index 374afda..0000000
--- a/autograder/core/report/ai_reporter.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from openai import OpenAI
-
-from autograder.builder.models.template import Template
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.report.base_reporter import BaseReporter
-from autograder.core.utils.secrets_fetcher import get_secret
-
-
-# Supondo que estas classes estão em seus respectivos arquivos e são importáveis
-# from .base_reporter import BaseReporter
-# from autograder.core.models.feedback_preferences import FeedbackPreferences
-# from autograder.core.models.result import Result
-
-class AIReporter(BaseReporter):
-    """
-    Gera um feedback sofisticado e humanizado, enviando um prompt detalhado
-    para um modelo de IA.
-    """
-
-    def __init__(self, result: 'Result', feedback: 'FeedbackPreferences', test_library: 'Template', quota: int):
-        super().__init__(result, feedback,test_library)
-        openai_key = get_secret("OPENAI_API_KEY", "AUTOGRADER_OPENAI_KEY", "us-east-1")
-        if not openai_key:
-            raise ValueError("A chave da API da OpenAI é necessária para o AiReporter.")
-        self.client = OpenAI(api_key=openai_key)
-        self.quota = quota
-        self.test_library = test_library
-
-    def generate_feedback(self) -> str:
-        """
-        Constrói um prompt detalhado e chama o modelo de IA para gerar o feedback.
-        """
-        final_prompt = self._build_prompt()
-
-        try:
-            response = self.client.chat.completions.create(
-                model="gpt-4",  # Ou outro modelo de sua escolha
-                messages=[
-                    {"role": "system", "content": self.feedback.ai.feedback_persona},
-                    {"role": "user", "content": final_prompt}
-                ],
-                temperature=0.6)
-            ai_generated_text = response.choices[0].message.content
-
-
-        except Exception as e:
-            ai_generated_text = f"**Ocorreu um erro ao gerar o feedback da IA:** {e}\n\nRetornando para o feedback padrão."
-
-        # --- Formata o relatório final ---
-        report_parts = [
-            f"# {self.feedback.general.report_title}",
-            f"<sup>Este é um feedback gerado por IA e pode conter erros. Você tem {self.quota} créditos restantes.</sup>",
-            f"\nOlá, **{self.result.author}**! Aqui está um feedback detalhado sobre sua atividade.",
-            f"> **Nota Final:** **`{self.result.final_score:.2f} / 100`**",
-            "---",
-            ai_generated_text  # O conteúdo principal vem da IA
-        ]
-
-        if self.feedback.general.add_report_summary:
-            summary = self._build_summary()
-            if summary:
-                report_parts.append(summary)
-
-        report_parts.append("\n\n---\n" + "> Caso queira tirar uma dúvida específica, entre em contato com o Chapter.")
-
-        return "\n".join(filter(None, report_parts))
-
-    def _format_parameters(self, params: dict) -> str:
-        """Helper function to format parameters into a readable code string."""
-        if not params:
-            return ""
-        parts = [f"`{k}`: `{v}`" if isinstance(v, str) else f"`{k}`: `{v}`" for k, v in params.items()]
-        return f" (Parâmetros: {', '.join(parts)})"
-
-    def _build_prompt(self) -> str:
-        """Monta todas as informações necessárias em um único e grande prompt para a IA."""
-
-        prompt_parts = [
-            f"**Persona da IA:**\n{self.feedback.ai.feedback_persona}",
-            f"**Contexto da Atividade:**\n{self.feedback.ai.assignment_context}",
-            f"**Orientações Adicionais:**\n{self.feedback.ai.extra_orientations}",
-            f"**Tom do Feedback:**\n{self.feedback.ai.feedback_tone}",
-            f"**Nível de Ajuda com Soluções:**\n{self.feedback.ai.provide_solutions}",
-            "---",
-            self._get_submission_files_as_text(),
-            "---",
-            self._format_test_results_for_prompt(),
-            "---",
-            self._format_learning_resources_for_prompt(),
-            "---",
-            "**Sua Tarefa:**\nCom base em todo o contexto, código e resultados dos testes fornecidos, escreva um feedback em markdown que seja útil e educativo, seguindo todas as orientações."
-        ]
-        return "\n\n".join(filter(None, prompt_parts))
-
-    def _get_submission_files_as_text(self) -> str:
-        """Lê o conteúdo dos arquivos do aluno especificados nas preferências."""
-        files_to_read = self.feedback.ai.submission_files_to_read
-        if not files_to_read:
-            return "**Código do Aluno:**\nNenhum arquivo foi especificado para leitura."
-
-        file_contents = ["**Código do Aluno:**"]
-        for filename in files_to_read:
-            content = self.result.submission_files.get(filename, f"Arquivo '{filename}' não encontrado.")
-            file_contents.append(f"\n---\n`{filename}`\n---\n```\n{content}\n```")
-
-        return "\n".join(file_contents)
-
-    def _format_test_results_for_prompt(self) -> str:
-        """Formata os resultados dos testes em uma string para a IA analisar."""
-        results_parts = ["**Resultados dos Testes para Análise:**"]
-
-        failed_base = [res for res in self.result.base_results if res.score < 100]
-        passed_bonus = [res for res in self.result.bonus_results if res.score >= 100]
-        failed_penalty = [res for res in self.result.penalty_results if res.score < 100]
-
-        if failed_base:
-            results_parts.append("\n**Testes Obrigatórios que Falharam (Erros Críticos):**")
-            for res in failed_base:
-                results_parts.append(
-                    f"- Teste: `{res.test_name}`, Parâmetros: `{res.parameters}`, Mensagem: {res.report}")
-
-        if passed_bonus and self.feedback.general.show_passed_tests:
-            results_parts.append("\n**Testes Bônus Concluídos com Sucesso (Elogiar):**")
-            for res in passed_bonus:
-                results_parts.append(f"- Teste: `{res.test_name}`, Parâmetros: `{res.parameters}`")
-
-        if failed_penalty:
-            results_parts.append("\n**Penalidades Aplicadas (Más Práticas Detectadas):**")
-            for res in failed_penalty:
-                results_parts.append(
-                    f"- Teste: `{res.test_name}`, Parâmetros: `{res.parameters}`, Mensagem: {res.report}")
-
-        return "\n".join(results_parts)
-
-    def _format_learning_resources_for_prompt(self) -> str:
-        """Formata o conteúdo online para que a IA saiba qual link sugerir para cada erro."""
-        if not self.feedback.general.online_content:
-            return ""
-
-        resource_parts = [
-            "**Recursos de Aprendizagem Disponíveis:**\nSe um teste que falhou estiver listado abaixo, sugira o link correspondente."]
-
-        for resource in self.feedback.general.online_content:
-            tests = ", ".join(f"`{t}`" for t in resource.linked_tests)
-            resource_parts.append(
-                f"- Se os testes {tests} falharem, recomende este link: [{resource.description}]({resource.url})")
-
-        return "\n".join(resource_parts)
-
-    def _build_summary(self) -> str:
-        """Constructs the final summary section of the report using a markdown table."""
-        summary_parts = ["\n---\n\n### 📝 Resumo dos Pontos de Atenção"]
-        failed_base = [res for res in self.result.base_results if res.score < 100]
-        failed_penalty = [res for res in self.result.penalty_results if res.score < 100]
-
-        if not failed_base and not failed_penalty:
-            return ""  # No need for a summary if everything is okay
-
-        summary_parts.append("| Ação | Tópico | Detalhes do Teste |")
-        summary_parts.append("|:---|:---|:---|")
-
-        all_failed = failed_base + failed_penalty
-        for res in all_failed:
-            try:
-                # Get the test function from the library to access its description
-                print("Looking for mother function of test:", res.test_name)
-                print(self.test_library)
-                print("Available tests in library:", self.test_library.template_name)
-                test_func = self.test_library.get_test(res.test_name)
-                print("Testing function:", test_func.name)
-                description = test_func.description
-            except AttributeError:
-                description = "Descrição não disponível."
-
-            params_str = self._format_parameters(res.parameters).replace(" (Parâmetros: ", "").replace(")", "")
-
-            # Determine the action type
-            action = "Revisar"
-            if res in failed_penalty:
-                action = "Corrigir (Penalidade)"
-
-            # Build the detailed cell content
-            details_cell = (
-                f"**Teste:** `{res.test_name}`<br>"
-                f"**O que ele faz:** *{description}*<br>"
-                f"**Parâmetros:** <sub>{params_str or 'N/A'}</sub>"
-            )
-
-            summary_parts.append(f"| {action} | `{res.subject_name}` | {details_cell} |")
-
-        return "\n".join(summary_parts)
-
-    def _get_mock_ai_response(self) -> str:
-        """Uma resposta mockada para fins de teste, já que não estamos fazendo uma chamada de API real."""
-        return (
-            "### Análise Geral\n"
-            "Seu projeto está bem estruturado, mas notei alguns pontos de atenção, principalmente relacionados à acessibilidade das imagens e à responsividade.\n\n"
-            "#### Pontos a Melhorar\n"
-            "> **Acessibilidade de Imagens**\n"
-            "> Percebi que uma de suas imagens está sem o atributo `alt`. Este atributo é fundamental para que leitores de tela possam descrever a imagem para usuários com deficiência visual. Analisando seu `index.html`, a segunda tag `<img>` precisa ser corrigida.\n\n"
-            "> **Responsividade com Media Queries**\n"
-            "> Seu CSS não inclui `@media` queries. Sem elas, seu layout não conseguirá se adaptar a telas menores, como as de celulares. Recomendo fortemente a leitura do material sobre Media Queries para implementar essa funcionalidade."
-        )
-
-    @classmethod
-    def create(cls, result: 'Result', feedback: 'FeedbackPreferences', quota: int, test_library: 'Template'):
-        response = cls(result, feedback, quota, test_library)
-        return response
diff --git a/autograder/core/report/base_reporter.py b/autograder/core/report/base_reporter.py
deleted file mode 100644
index a013cb0..0000000
--- a/autograder/core/report/base_reporter.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import List, Dict
-
-# Assuming these classes are in their respective, importable files
-# from autograder.core.models.feedback_preferences import FeedbackPreferences
-# from autograder.core.models.result import Result
-# from autograder.builder.tree_builder import TestResult
-
-class BaseReporter(ABC):
-    """Abstract base class for reporting test results."""
-    def __init__(self, result: 'Result', feedback: 'FeedbackPreferences',template):
-        self.result = result
-        self.feedback = feedback
-        self.template = template
-        # A map to quickly find learning resources for a given test name
-        self._content_map = self._build_content_map()
-
-    def _build_content_map(self) -> Dict[str, 'FeedbackPreferences.LearningResource']:
-        """
-        Creates a dictionary for fast lookups of learning resources by test name.
-        This is a shared utility for any reporter.
-        """
-        content_map = {}
-        for resource in self.feedback.general.online_content:
-            for test_name in resource.linked_tests:
-                content_map[test_name] = resource
-        return content_map
-
-    def _group_results_by_subject(self, results: List['TestResult']) -> Dict[str, List['TestResult']]:
-        """
-        Groups a flat list of TestResult objects into a dictionary keyed by subject name.
-        This is a shared utility for any reporter.
-        """
-        grouped = {}
-        for result in results:
-            if result.subject_name not in grouped:
-                grouped[result.subject_name] = []
-            grouped[result.subject_name].append(result)
-        return grouped
-
-    @abstractmethod
-    def generate_feedback(self):
-        """Generate feedback based on the test results."""
-        pass
-
-    @classmethod
-    def create(cls, result: 'Result', feedback: 'FeedbackPreferences',template):
-        response = cls(result, feedback,template)
-        return response
\ No newline at end of file
diff --git a/autograder/core/report/default_reporter.py b/autograder/core/report/default_reporter.py
deleted file mode 100644
index 0ab74b3..0000000
--- a/autograder/core/report/default_reporter.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from autograder.builder.models.template import Template
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.report.base_reporter import BaseReporter
-
-
-class DefaultReporter(BaseReporter):
-    """
-    Generates a structured and visually appealing markdown feedback report
-    designed to be a clear and helpful learning tool for students.
-    """
-
-    def __init__(self, result: 'Result', feedback: 'FeedbackPreferences', test_library: 'Template'):
-        super().__init__(result, feedback, test_library)
-        self.test_library = test_library
-
-    def generate_feedback(self) -> str:
-        """
-        Builds the entire markdown report by assembling its various sections.
-        """
-        report_parts = [
-            self._build_header(),
-            self._build_category_section("bonus"),
-            self._build_category_section("base"),
-            self._build_category_section("penalty")
-        ]
-
-        if self.feedback.general.add_report_summary:
-            summary = self._build_summary()
-            if summary:  # Only add summary if it's not empty
-                report_parts.append(summary)
-
-        report_parts.append(self._build_footer())
-        return "\n".join(filter(None, report_parts))
-
-    def _format_parameters(self, params: dict) -> str:
-        """Helper function to format parameters into a readable code string."""
-        if not params:
-            return ""
-        parts = [f"`{k}`: `{v}`" if isinstance(v, str) else f"`{k}`: `{v}`" for k, v in params.items()]
-        return f" (Parâmetros: {', '.join(parts)})"
-
-    def _build_header(self) -> str:
-        """Constructs the top section of the report."""
-        header_parts = [f"# {self.feedback.general.report_title}"]
-        if self.feedback.general.show_score:
-            header_parts.append(f"> **Nota Final:** **`{self.result.final_score:.2f} / 100`**")
-
-        header_parts.append(
-            f"\nOlá, **{self.result.author}**! 👋\n\nAqui está o feedback detalhado sobre sua atividade. Use este guia para entender seus acertos e os pontos que podem ser melhorados.")
-        return "\n".join(header_parts)
-
-    def _build_category_section(self, category_name: str) -> str:
-        """Builds a report section for a specific category with enhanced formatting and text."""
-        category_results = getattr(self.result, f"{category_name}_results", [])
-        header = self.feedback.default.category_headers.get(category_name, category_name.capitalize())
-        section_parts = [f"\n---\n\n## {header}"]
-
-        results_to_show = []
-        intro_text = ""
-        is_bonus = False
-
-        if category_name == "bonus":
-            is_bonus = True
-            if self.feedback.general.show_passed_tests:
-                results_to_show = [res for res in category_results if res.score >= 60]
-                intro_text = "Parabéns! Você completou os seguintes itens bônus, demonstrando um ótimo conhecimento:" if results_to_show else "Nenhum item bônus foi completado desta vez. Continue se desafiando!"
-        else:  # base and penalty
-            results_to_show = [res for res in category_results if res.score < 60]
-            if category_name == "base":
-                intro_text = "Encontramos alguns pontos nos requisitos essenciais que precisam de sua atenção:" if results_to_show else "Excelente! Todos os requisitos essenciais foram atendidos com sucesso."
-            elif category_name == "penalty":
-                intro_text = "Foram detectadas algumas práticas que resultaram em penalidades. Veja os detalhes abaixo para entender como corrigi-las:" if results_to_show else "Ótimo trabalho! Nenhuma má prática foi detectada no seu projeto."
-
-        section_parts.append(intro_text)
-
-        if not results_to_show:
-            return "\n".join(section_parts)
-
-        grouped_results = self._group_results_by_subject(results_to_show)
-
-        for subject, results in grouped_results.items():
-            section_parts.append(f"\n#### Tópico: {subject.replace('_', ' ').capitalize()}")
-            for res in results:
-                params_str = self._format_parameters(res.parameters)
-
-                if is_bonus:
-                    status_text = "✅ **Passou**"
-                    report_prefix = "Parabéns!"
-                else:
-                    status_text = "❌ **Falhou**"
-                    report_prefix = "Atenção:" if category_name == "base" else "Cuidado!"
-
-                feedback_item = [
-                    f"> {status_text} no teste `{res.test_name}`{params_str}",
-                    f"> - **Detalhes:** {report_prefix} {res.report}\n"
-                ]
-
-                if not is_bonus:
-                    linked_content = self._content_map.get(res.test_name)
-                    if linked_content:
-                        feedback_item.append(
-                            f"> - 📚 **Recurso Sugerido:** [{linked_content.description}]({linked_content.url})\n")
-
-                section_parts.append("\n".join(feedback_item))
-
-        return "\n".join(section_parts)
-
-    def _build_summary(self) -> str:
-        """Constructs the final summary section of the report using a markdown table."""
-        summary_parts = ["\n---\n\n### 📝 Resumo dos Pontos de Atenção"]
-        failed_base = [res for res in self.result.base_results if res.score < 100]
-        failed_penalty = [res for res in self.result.penalty_results if res.score < 100]
-
-        if not failed_base and not failed_penalty:
-            return ""  # No need for a summary if everything is okay
-
-        summary_parts.append("| Ação | Tópico | Detalhes do Teste |")
-        summary_parts.append("|:---|:---|:---|")
-
-        all_failed = failed_base + failed_penalty
-        for res in all_failed:
-            try:
-                # Get the test function from the library to access its description
-                test_func = self.test_library.get_test(res.test_name)
-                description = test_func.description
-            except AttributeError:
-                description = "Descrição não disponível."
-
-            params_str = self._format_parameters(res.parameters).replace(" (Parâmetros: ", "").replace(")", "")
-
-            # Determine the action type
-            action = "Revisar"
-            if res in failed_penalty:
-                action = "Corrigir (Penalidade)"
-
-            # Build the detailed cell content
-            details_cell = (
-                f"**Teste:** `{res.test_name}`<br>"
-                f"**O que foi verificado:** *{description}*<br>"
-                f"**Parâmetros:** <sub>{params_str or 'N/A'}</sub>"
-            )
-
-            summary_parts.append(f"| {action} | `{res.subject_name}` | {details_cell} |")
-
-        return "\n".join(summary_parts)
-
-    def _build_footer(self) -> str:
-        """Constructs the footer of the report."""
-        return "\n---\n" + "> Continue praticando e melhorando seu código. Cada desafio é uma oportunidade de aprender! 🚀"
diff --git a/autograder/core/report/fatal_report.py b/autograder/core/report/fatal_report.py
deleted file mode 100644
index caddeb4..0000000
--- a/autograder/core/report/fatal_report.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import json
-
-
-
-
-class FatalReporter:
-    """
-    This class is responsible for generating a report for fatal errors in the autograder.
-    It reads a JSON file containing error details and formats it into a
-    user-friendly markdown report.
-    """
-    # --- Project Directory Setup ---
-    # These paths are configured to locate necessary files within the project structure.
-    _THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__))
-    _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(_THIS_FILE_DIR)))
-    VALIDATION_DIR = os.path.join(_PROJECT_ROOT, "autograder",'validation')
-    REQUEST_BUCKET_DIR = os.path.join(_PROJECT_ROOT, 'request_bucket')
-    RESULTS_DIR = os.path.join(VALIDATION_DIR, 'tests', 'results')
-
-    @staticmethod
-    def generate_feedback(report_path=None):
-        """
-        Generates a markdown feedback report based on fatal error results from a JSON file.
-
-        This method reads a JSON file that details fatal errors encountered by the
-        autograder, formats them into a structured and readable markdown report,
-        and returns the report as a string.
-
-        Args:
-            report_path (str, optional): The full path to the JSON report file.
-                                         If None, it defaults to a file named
-                                         'fatal_errors.json' in the class's RESULTS_DIR.
-
-        Returns:
-            str: A string containing the formatted markdown report.
-        """
-        # If no specific path is provided, construct the default path
-        if report_path is None:
-            print(FatalReporter.RESULTS_DIR)
-            report_path = os.path.join(FatalReporter.RESULTS_DIR, 'fatal_report.json')
-
-        # --- Read and Validate Report File ---
-        try:
-            with open(report_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-        except FileNotFoundError:
-            return "## ❌ Error\nCould not find the fatal error report file. Please contact an administrator."
-        except json.JSONDecodeError:
-            return "## ❌ Error\nCould not parse the fatal error report file due to a syntax error. Please contact an administrator."
-
-        errors = data.get("errors", [])
-        if not errors:
-            return "## ✅ No Fatal Errors Found\nYour submission passed all initial checks."
-
-        # --- Group Errors for Structured Reporting ---
-        grouped_errors = {}
-        for error in errors:
-            error_type = error.get("type", "unknown_error")
-            if error_type not in grouped_errors:
-                grouped_errors[error_type] = []
-            grouped_errors[error_type].append(error.get("message", "No message provided."))
-
-        # --- Build the Markdown Report ---
-        markdown_report = ["# 🚨 Autograder Fatal Error Report\n"]
-        markdown_report.append(
-            "We're sorry, but the autograder could not run due to the following critical issues with your submission. Please fix them and resubmit.\n")
-
-        # Handle specific, common error types with custom formatting
-        if "file_check" in grouped_errors:
-            markdown_report.append("---")
-            markdown_report.append("## 📁 Missing Files")
-            markdown_report.append(
-                "The following required files were not found. Please ensure they are named correctly and are located in the root directory of your project.\n")
-            for msg in grouped_errors.pop("file_check"):
-                # Attempt to extract the filename for cleaner display
-                try:
-                    filename = msg.split("'")[1]
-                    markdown_report.append(f"- ` {filename} `")
-                except IndexError:
-                    markdown_report.append(f"- {msg}")
-            markdown_report.append("\n")
-
-        # Handle any other error types generically
-        for error_type, messages in grouped_errors.items():
-            markdown_report.append("---")
-            heading = error_type.replace('_', ' ').title()
-            markdown_report.append(f"## ❗ {heading}")
-            for msg in messages:
-                markdown_report.append(f"- {msg}")
-            markdown_report.append("\n")
-
-        markdown_report.append("---\n")
-        markdown_report.append(
-            "**Next Steps:** Please review the errors listed above, correct your project files accordingly, and submit your work again.")
-
-        return "\n".join(markdown_report)
-
-    @classmethod
-    def create(cls, result):
-        """
-        This class method would be responsible for creating the initial
-        fatal_errors.json file before generate_feedback is called.
-        (Implementation is beyond the scope of this example).
-        """
-        # Example:
-        # report_path = os.path.join(cls.RESULTS_DIR, 'fatal_errors.json')
-        # with open(report_path, 'w', encoding='utf-8') as f:
-        #     json.dump(result, f, indent=2)
-        pass
-
-if __name__ == "__main__":
-    # Example usage
-    report = FatalReporter.generate_feedback()
-    print(report)
-    # Note: In a real scenario, you would call FatalReporter.create(result) to create the initial report file.
diff --git a/autograder/core/report/reporter_factory.py b/autograder/core/report/reporter_factory.py
deleted file mode 100644
index 51ff60c..0000000
--- a/autograder/core/report/reporter_factory.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.models.result import Result
-from autograder.core.report.ai_reporter import AIReporter
-from autograder.core.report.default_reporter import DefaultReporter
-class Reporter:
-    @classmethod
-    def create_ai_reporter(cls, result: Result, feedback: FeedbackPreferences,template, quota):
-        """Creates an AIReporter instance with the students results"""
-        return AIReporter.create(result,feedback,template,quota)
-
-    @classmethod
-    def create_default_reporter(cls, result: Result,feedback: FeedbackPreferences,template):
-        """Creates a DefaultReporter instance with the students results"""
-        return DefaultReporter.create(result,feedback,template)
-
-
diff --git a/autograder/core/schemas/config_schemas/ai_feedback_schema.json b/autograder/core/schemas/config_schemas/ai_feedback_schema.json
deleted file mode 100644
index e69de29..0000000
diff --git a/autograder/core/schemas/config_schemas/criteria_schema.json b/autograder/core/schemas/config_schemas/criteria_schema.json
deleted file mode 100644
index 761e6e3..0000000
--- a/autograder/core/schemas/config_schemas/criteria_schema.json
+++ /dev/null
@@ -1,196 +0,0 @@
-{
-  "test_library": "web_dev",
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "html": {
-        "weight": 60,
-        "subjects": {
-          "structure": {
-            "weight": 40,
-            "tests": [
-              {
-                "file": "index.html",
-                "name": "has_tag",
-                "calls": [
-                  ["body", 1],
-                  ["header", 1],
-                  ["nav", 1],
-                  ["main", 1],
-                  ["article", 4],
-                  ["img", 5],
-                  ["footer", 1],
-                  ["div", 1],
-                  ["form", 1],
-                  ["input", 1],
-                  ["button", 1]
-                ]
-              },
-              {
-                "file": "index.html",
-                "name": "has_attribute",
-                "calls": [
-                  ["class", 2]
-                ]
-              }
-            ]
-          },
-          "link": {
-            "weight": 20,
-            "tests": [
-              {
-                "file": "index.html",
-                "name": "check_css_linked"
-              },
-              {
-                "file": "index.html",
-                "name": "check_internal_links_to_articles",
-                "calls": [
-                  [4]
-                ]
-              }
-            ]
-          }
-        }
-      },
-      "css": {
-        "weight": 40,
-        "subjects": {
-          "responsivity": {
-            "weight": 50,
-            "tests": [
-              {
-                "file": "css/styles.css",
-                "name": "uses_relative_units"
-              },
-              {
-                "file": "css/styles.css",
-                "name": "check_media_queries"
-              },
-              {
-                "file": "css/styles.css",
-                "name": "check_flexbox_usage"
-              }
-            ]
-          },
-          "style": {
-            "weight": 50,
-            "tests": [
-              {
-                "file": "css/styles.css",
-                "name": "has_style",
-                "calls": [
-                  ["font-size", 1],
-                  ["font-family", 1],
-                  ["text-align", 1],
-                  ["display", 1],
-                  ["position", 1],
-                  ["margin", 1],
-                  ["padding", 1]
-                ]
-              }
-            ]
-          }
-        }
-      }
-    }
-  },
-  "bonus": {
-    "weight": 40,
-    "subjects": {
-      "accessibility": {
-        "weight": 20,
-        "tests": [
-          {
-            "file": "index.html",
-            "name": "check_all_images_have_alt"
-          }
-        ]
-      },
-      "head_detail": {
-        "weight": 80,
-        "tests": [
-          {
-            "file": "index.html",
-            "name": "check_head_details",
-            "calls": [
-              ["title"],
-              ["meta"]
-            ]
-          },
-          {
-            "file": "index.html",
-            "name": "check_attribute_and_value",
-            "calls": [
-              ["meta", "charset", "UTF-8"],
-              ["meta", "name", "viewport"],
-              ["meta", "name", "description"],
-              ["meta", "name", "author"],
-              ["meta", "name", "keywords"]
-            ]
-          }
-        ]
-      }
-    }
-  },
-  "penalty": {
-    "weight": 50,
-    "subjects": {
-      "html": {
-        "weight": 50,
-        "tests": [
-          {
-            "file": "index.html",
-            "name": "check_bootstrap_usage"
-          },
-          {
-            "file": "css/styles.css",
-            "name": "check_id_selector_over_usage",
-            "calls": [
-              [2]
-            ]
-          },
-          {
-            "file": "index.html",
-            "name": "has_forbidden_tag",
-            "calls": [
-              ["script"]
-            ]
-          },
-          {
-            "file": "index.html",
-            "name": "check_html_direct_children"
-          },
-          {
-            "file": "index.html",
-            "name": "check_tag_not_inside",
-            "calls": [
-              ["header", "main"],
-              ["footer", "main"]
-            ]
-          }
-        ]
-      },
-      "project_structure": {
-        "weight": 50,
-        "tests": [
-          {
-            "file": "all",
-            "name": "check_dir_exists",
-            "calls": [
-              ["css"],
-              ["imgs"]
-            ]
-          },
-          {
-            "file": "all",
-            "name": "check_project_structure",
-            "calls": [
-              ["css/styles.css"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/autograder/core/schemas/config_schemas/feedback_schema.json b/autograder/core/schemas/config_schemas/feedback_schema.json
deleted file mode 100644
index e69de29..0000000
diff --git a/autograder/core/utils/result_processor.py b/autograder/core/utils/result_processor.py
deleted file mode 100644
index 048add2..0000000
--- a/autograder/core/utils/result_processor.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import json
-import os
-
-
-class ResultProcessor:
-    # Define the project root here as well to ensure paths are consistent
-    _PROJECT_ROOT = os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..'))
-
-    @staticmethod
-    def load_results(result_file_name: str) -> dict:
-        """Loads test results from a JSON file using an absolute path."""
-        # Construct the absolute path from the project root
-        print("PROJECT ROOT:", ResultProcessor._PROJECT_ROOT)
-        absolute_path = os.path.join(ResultProcessor._PROJECT_ROOT, 'validation', '__tests__','results', result_file_name)
-
-        print(f"Attempting to load results from: {absolute_path}")
-        try:
-            with open(absolute_path, "r") as f:
-                data =  json.load(f)
-            # data is a list of test result dicts
-            passed_tests = [test for test in data if test.get("status") == "passed"]
-            failed_tests = [test for test in data if test.get("status") == "failed"]
-            quantitative_results = {}  # Not present in this format
-            return passed_tests, failed_tests, quantitative_results
-        except FileNotFoundError:
-            print(
-                f"ERROR: File not found at {absolute_path}. This indicates a race condition or a file naming mismatch.")
-            raise
-        except json.JSONDecodeError:
-            print(f"ERROR: Could not decode JSON from {absolute_path}. The file might be empty or malformed.")
-            raise
\ No newline at end of file
diff --git a/autograder/builder/__init__.py b/autograder/models/__init__.py
similarity index 100%
rename from autograder/builder/__init__.py
rename to autograder/models/__init__.py
diff --git a/autograder/builder/execution_helpers/__init__.py b/autograder/models/abstract/__init__.py
similarity index 100%
rename from autograder/builder/execution_helpers/__init__.py
rename to autograder/models/abstract/__init__.py
diff --git a/autograder/models/abstract/step.py b/autograder/models/abstract/step.py
new file mode 100644
index 0000000..eef6a9b
--- /dev/null
+++ b/autograder/models/abstract/step.py
@@ -0,0 +1,11 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+from autograder.models.dataclass.step_result import StepResult
+
+
+class Step(ABC):
+    @abstractmethod
+    def execute(self, input: Any) -> StepResult[Any]:
+        pass
+
diff --git a/autograder/builder/models/template.py b/autograder/models/abstract/template.py
similarity index 81%
rename from autograder/builder/models/template.py
rename to autograder/models/abstract/template.py
index 1ac30b1..a0962ed 100644
--- a/autograder/builder/models/template.py
+++ b/autograder/models/abstract/template.py
@@ -1,13 +1,17 @@
 from abc import ABC, abstractmethod
 
-class Template(ABC):
+from autograder.models.abstract.test_function import TestFunction
+
 
+class Template(ABC):
     def __init__(self):
         self.tests = None
+
     @property
     @abstractmethod
     def template_name(self) -> str:
         pass
+
     @property
     @abstractmethod
     def template_description(self) -> str:
@@ -28,14 +32,14 @@ def requires_execution_helper(self) -> bool:
     def execution_helper(self):
         pass
 
+    @abstractmethod
+    def get_test(self, name: str) -> TestFunction:
+        pass
+
     @abstractmethod
     def stop(self):
         pass
+
     def get_tests(self):
         return self.tests
 
-
-
-
-
-    
\ No newline at end of file
diff --git a/autograder/builder/models/test_function.py b/autograder/models/abstract/test_function.py
similarity index 88%
rename from autograder/builder/models/test_function.py
rename to autograder/models/abstract/test_function.py
index 1a15167..31c6503 100644
--- a/autograder/builder/models/test_function.py
+++ b/autograder/models/abstract/test_function.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Dict, List, Optional
-from autograder.builder.models.criteria_tree import TestResult
-from autograder.builder.models.param_description import ParamDescription
+from autograder.models.dataclass.test_result import TestResult
+from autograder.models.dataclass.param_description import ParamDescription
 
 
 class TestFunction(ABC):
diff --git a/autograder/builder/models/__init__.py b/autograder/models/config/__init__.py
similarity index 100%
rename from autograder/builder/models/__init__.py
rename to autograder/models/config/__init__.py
diff --git a/autograder/models/config/category.py b/autograder/models/config/category.py
new file mode 100644
index 0000000..3c25f73
--- /dev/null
+++ b/autograder/models/config/category.py
@@ -0,0 +1,40 @@
+from typing import List, Optional
+
+from autograder.models.config.subject import SubjectConfig
+from .test import TestConfig
+from pydantic import BaseModel, Field, model_validator
+
+
+class CategoryConfig(BaseModel):
+    weight: float = Field(
+        ..., ge=0, le=100, description="Weight of this category (0-100)"
+    )
+    tests: Optional[List[TestConfig]] = Field(
+        None, description="Tests under this subject"
+    )
+    subjects: Optional[List[SubjectConfig]] = Field(None, description="Nested subjects")
+    subjects_weight: Optional[int] = Field(
+        None,
+        ge=0,
+        le=100,
+        description="Weight of the subject when it is a heterogeneous tree",
+    )
+
+    model_config = {"extra": "forbid"}
+
+    @model_validator(mode="after")
+    def check_subjects_and_tests(self) -> "CategoryConfig":
+        """Validate that category has at least tests or subjects."""
+        has_tests = self.tests is not None and len(self.tests) > 0
+        has_subjects = self.subjects is not None and len(self.subjects) > 0
+        has_subject_weight = self.subjects_weight is not None
+
+        if not has_tests and not has_subjects:
+            raise ValueError("Category must have at least 'tests' or 'subjects'.")
+
+        if has_tests and has_subjects and not has_subject_weight:
+            raise ValueError(
+                "Category needs 'subjects_weight' defined when has tests and subjects"
+            )
+
+        return self
diff --git a/autograder/models/config/criteria.py b/autograder/models/config/criteria.py
new file mode 100644
index 0000000..fd2780a
--- /dev/null
+++ b/autograder/models/config/criteria.py
@@ -0,0 +1,27 @@
+from typing import Optional
+from pydantic import BaseModel, Field
+
+from autograder.models.config.category import CategoryConfig
+
+
+class CriteriaConfig(BaseModel):
+    """Root configuration for grading criteria."""
+
+    test_library: Optional[str] = Field(
+        None, description="Name of the test library/template to use"
+    )
+    base: CategoryConfig = Field(..., description="Base grading criteria (required)")
+    bonus: Optional[CategoryConfig] = Field(None, description="Bonus points criteria")
+    penalty: Optional[CategoryConfig] = Field(None, description="Penalty criteria")
+
+    model_config = {"extra": "forbid"}
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "CriteriaConfig":
+        """Create and validate criteria config from dictionary."""
+        return cls.model_validate(data)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "CriteriaConfig":
+        """Create and validate criteria config from JSON string."""
+        return cls.model_validate_json(json_str)
diff --git a/autograder/models/config/subject.py b/autograder/models/config/subject.py
new file mode 100644
index 0000000..6910137
--- /dev/null
+++ b/autograder/models/config/subject.py
@@ -0,0 +1,41 @@
+from typing import List, Optional
+from .test import TestConfig
+from pydantic import BaseModel, Field, model_validator
+
+
+class SubjectConfig(BaseModel):
+    subject_name: str = Field(..., description="Name of the subject")
+    weight: float = Field(
+        ..., ge=0, le=100, description="Weight of this subject (0-100)"
+    )
+    tests: Optional[List[TestConfig]] = Field(
+        None, description="Tests under this subject"
+    )
+    subjects: Optional[List["SubjectConfig"]] = Field(
+        None, description="Nested subjects"
+    )
+    subjects_weight: Optional[int] = Field(
+        None,
+        ge=0,
+        le=100,
+        description="Weight of the subject when it is a heterogeneous tree",
+    )
+
+    model_config = {"extra": "forbid"}
+
+    @model_validator(mode="after")
+    def check_subjects_and_tests(self) -> "SubjectConfig":
+        """Validate that category has at least tests or subjects."""
+        has_tests = self.tests is not None and len(self.tests) > 0
+        has_subjects = self.subjects is not None and len(self.subjects) > 0
+        has_subject_weight = self.subjects_weight is not None
+
+        if not has_tests and not has_subjects:
+            raise ValueError("Subject must have at least 'tests' or 'subjects'.")
+
+        if has_tests and has_subjects and not has_subject_weight:
+            raise ValueError(
+                "Subject needs 'subjects_weight' defined when has tests and subjects"
+            )
+
+        return self
diff --git a/autograder/models/config/test.py b/autograder/models/config/test.py
new file mode 100644
index 0000000..9af903e
--- /dev/null
+++ b/autograder/models/config/test.py
@@ -0,0 +1,37 @@
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+
+
+class ParameterConfig(BaseModel):
+    """Named parameter for a test function."""
+
+    name: str = Field(..., description="Parameter name")
+    value: Any = Field(..., description="Parameter value")
+
+    model_config = {"extra": "forbid"}
+
+
+class TestConfig(BaseModel):
+    """Configuration for a single test execution."""
+
+    name: str = Field(..., description="Name of the test function in the template")
+    file: Optional[str] = Field(
+        None, description="Target file for the test (if applicable)"
+    )
+    parameters: Optional[List[ParameterConfig]] = Field(
+        None, description="Named parameters for the test function"
+    )
+
+    model_config = {"extra": "forbid"}
+
+    def get_args_list(self) -> List[Any]:
+        """Convert named parameters to positional arguments list."""
+        if not self.parameters:
+            return []
+        return [param.value for param in self.parameters]
+
+    def get_kwargs_dict(self) -> Dict[str, Any]:
+        """Convert named parameters to keyword arguments dictionary."""
+        if not self.parameters:
+            return {}
+        return {param.name: param.value for param in self.parameters}
diff --git a/autograder/models/criteria_tree.py b/autograder/models/criteria_tree.py
new file mode 100644
index 0000000..fb6f659
--- /dev/null
+++ b/autograder/models/criteria_tree.py
@@ -0,0 +1,135 @@
+"""
+Updated Criteria Tree models with embedded test functions.
+
+These models represent the grading criteria structure with test functions
+embedded during tree building (no more lazy loading or pre-execution).
+"""
+
+from typing import List, Optional, Any
+from dataclasses import dataclass, field
+
+from autograder.utils.printers.criteria_tree import CriteriaTreePrinter
+
+
+@dataclass
+class TestNode:
+    """
+    Leaf node representing a single test execution configuration.
+
+    Contains:
+    - Test function reference (from template)
+    - Parameters for execution
+    - File target (if applicable)
+    - Category and subject context
+    """
+
+    name: str
+    test_name: str
+    test_function: Any  # TestFunction instance from template
+    parameters: List[Any] = field(default_factory=list)
+    file_target: Optional[str] = None
+    weight: float = 100.0
+
+    def __repr__(self):
+        params_str = f", params={self.parameters}" if self.parameters else ""
+        file_str = f", file={self.file_target}" if self.file_target else ""
+        return f"TestNode({self.test_name}{params_str}{file_str})"
+
+
+@dataclass
+class SubjectNode:
+    """
+    Branch node representing a subject/topic in the grading criteria.
+
+    Can contain either:
+    - Nested subjects (recursive structure)
+    - Test nodes (leaf level)
+    """
+
+    name: str
+    weight: float
+    subjects: List["SubjectNode"] = field(default_factory=list)
+    tests: List[TestNode] = field(default_factory=list)
+    subjects_weight: Optional[float] = None
+
+    def __repr__(self):
+        if self.subjects:
+            return f"SubjectNode({self.name}, weight={self.weight}, subjects={len(self.subjects)})"
+        return (
+            f"SubjectNode({self.name}, weight={self.weight}, tests={len(self.tests)})"
+        )
+
+    def get_all_tests(self) -> List[TestNode]:
+        tests = [*self.tests]
+        for subject in self.subjects:
+            tests.extend(subject.get_all_tests())
+        return tests
+
+
+@dataclass
+class CategoryNode:
+    """
+    Top-level category node (base, bonus, or penalty).
+
+    Can contain either:
+    - Subjects (organized hierarchy)
+    - Tests (flat structure)
+    """
+
+    name: str
+    weight: float
+    subjects: List[SubjectNode] = field(default_factory=list)
+    tests: List[TestNode] = field(default_factory=list)
+    subjects_weight: Optional[float] = None
+
+    def __repr__(self):
+        if self.subjects:
+            return f"CategoryNode({self.name}, weight={self.weight}, subjects={len(self.subjects)})"
+        return (
+            f"CategoryNode({self.name}, weight={self.weight}, tests={len(self.tests)})"
+        )
+
+    def add_subjects(self, subjects: List[SubjectNode]) -> None:
+        self.subjects.extend(subjects)
+
+    def get_all_tests(self) -> List[TestNode]:
+        """Recursively collect all test nodes under this category."""
+        tests = []
+
+        if self.tests:
+            tests.extend(self.tests)
+
+        if self.subjects:
+            for subject in self.subjects:
+                tests.extend(subject.get_all_tests())
+
+        return tests
+
+
+@dataclass
+class CriteriaTree:
+    """
+    Root of the criteria tree structure.
+
+    Contains three main categories:
+    - base: Required grading criteria
+    - bonus: Optional bonus points
+    - penalty: Optional penalty points
+    """
+
+    base: CategoryNode
+    bonus: Optional[CategoryNode] = None
+    penalty: Optional[CategoryNode] = None
+
+    def __repr__(self):
+        categories = ["base"]
+        if self.bonus:
+            categories.append("bonus")
+        if self.penalty:
+            categories.append("penalty")
+        return f"CriteriaTree(categories={categories})"
+
+    def print_tree(self):
+        """Prints a visual representation of the entire criteria tree."""
+        printer = CriteriaTreePrinter()
+        printer.print_tree(self)
diff --git a/autograder/builder/template_library/__init__.py b/autograder/models/dataclass/__init__.py
similarity index 100%
rename from autograder/builder/template_library/__init__.py
rename to autograder/models/dataclass/__init__.py
diff --git a/autograder/core/models/autograder_response.py b/autograder/models/dataclass/autograder_response.py
similarity index 54%
rename from autograder/core/models/autograder_response.py
rename to autograder/models/dataclass/autograder_response.py
index 5f65303..d1eb3f6 100644
--- a/autograder/core/models/autograder_response.py
+++ b/autograder/models/dataclass/autograder_response.py
@@ -1,19 +1,18 @@
-from typing import List, Optional
+from dataclasses import dataclass, field
+from typing import List
 
-from autograder.builder.models.test_function import TestFunction
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 
-from pydantic import BaseModel, Field
 
-
-class AutograderResponse(BaseModel):
+@dataclass
+class AutograderResponse:
     """
     Represents the response from the autograder.
     """
     status: str
     final_score: float = 0.0
     feedback: str = ""
-    test_report: List[TestResult] = Field(default_factory=list)
+    test_report: List[TestResult] = field(default_factory=list)
 
     def __repr__(self) -> str:
         feedback_size = len(self.feedback) if self.feedback else 0
diff --git a/autograder/models/dataclass/feedback_preferences.py b/autograder/models/dataclass/feedback_preferences.py
new file mode 100644
index 0000000..938815c
--- /dev/null
+++ b/autograder/models/dataclass/feedback_preferences.py
@@ -0,0 +1,62 @@
+from typing import List, Dict
+from dataclasses import dataclass, field
+
+
+@dataclass
+class LearningResource:
+    """Represents a single online resource linked to specific test names."""
+    url: str
+    description: str
+    linked_tests: List[str]
+
+    def __repr__(self) -> str:
+        return f"LearningResource(url='{self.url}', tests={self.linked_tests})"
+
+
+@dataclass
+class GeneralPreferences:
+    """Preferences applicable to both Default and AI reporters."""
+    report_title: str = "Relatório de Avaliação"
+    show_score: bool = True
+    show_passed_tests: bool = False
+    add_report_summary: bool = True
+    online_content: List[LearningResource] = field(default_factory=list)
+
+
+@dataclass
+class AiReporterPreferences:
+    """Preferences specific to the AI Reporter."""
+    provide_solutions: str = "hint"
+    feedback_tone: str = "encouraging but direct"
+    feedback_persona: str = "Code Buddy"
+    assignment_context: str = ""
+    extra_orientations: str = ""
+    submission_files_to_read: List[str] = field(default_factory=list)
+
+
+def _default_category_headers() -> Dict[str, str]:
+    """Factory function for default category headers."""
+    return {
+        "base": "✅ Requisitos Essenciais",
+        "bonus": "⭐ Pontos Extras",
+        "penalty": "❌ Pontos a Melhorar"
+    }
+
+
+@dataclass
+class DefaultReporterPreferences:
+    """Preferences specific to the Default (template-based) Reporter."""
+    category_headers: Dict[str, str] = field(default_factory=_default_category_headers)
+
+
+@dataclass
+class FeedbackPreferences:
+    """
+    A unified model to store all feedback preferences, including the new
+    test-linked learning resources and legacy AI configurations.
+    """
+    general: GeneralPreferences = field(default_factory=GeneralPreferences)
+    ai: AiReporterPreferences = field(default_factory=AiReporterPreferences)
+    default: DefaultReporterPreferences = field(default_factory=DefaultReporterPreferences)
+
+
diff --git a/autograder/models/dataclass/grading_result.py b/autograder/models/dataclass/grading_result.py
new file mode 100644
index 0000000..256ceea
--- /dev/null
+++ b/autograder/models/dataclass/grading_result.py
@@ -0,0 +1,14 @@
+from dataclasses import dataclass
+from typing import Optional
+from autograder.models.result_tree import ResultTree
+
+
+@dataclass
+class GradingResult:
+    final_score: float
+    status: str
+    feedback: Optional[str] = None
+    result_tree: Optional['ResultTree'] = None
+    # In case of error
+    error: Optional[str] = None
+    failed_at_step: Optional[str] = None
diff --git a/autograder/builder/models/param_description.py b/autograder/models/dataclass/param_description.py
similarity index 100%
rename from autograder/builder/models/param_description.py
rename to autograder/models/dataclass/param_description.py
diff --git a/autograder/models/dataclass/pipeline_execution.py b/autograder/models/dataclass/pipeline_execution.py
new file mode 100644
index 0000000..8622eef
--- /dev/null
+++ b/autograder/models/dataclass/pipeline_execution.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+from typing import List
+
+from autograder.models.dataclass.step_result import StepResult, StepName
+from autograder.models.dataclass.submission import Submission
+
+
+@dataclass
+class PipelineExecution:
+    """
+    Main object of the autograder pipeline, keeps track of the execution and step results.
+
+    Attributes:
+        step_results (list): A list of StepResult objects representing the results of each step in the pipeline.
+        assignment_id (str): The unique identifier for the assignment being graded.
+        submission (Submission): The submission being processed in the pipeline.
+    """
+    step_results: List[StepResult]
+    assignment_id: str
+    submission: Submission
+
+    def add_step_result(self, step_result: StepResult) -> 'PipelineExecution':
+        self.step_results.append(step_result)
+        return self
+
+    def get_step_result(self, step_name: StepName) -> StepResult:
+        for step_result in self.step_results:
+            if step_result.step == step_name:
+                return step_result
+        raise ValueError(f"Step {step_name} was not executed in the pipeline.")
+
+    def get_previous_step(self):
+        return self.step_results[-1] if self.step_results else None
diff --git a/autograder/models/dataclass/preflight_error.py b/autograder/models/dataclass/preflight_error.py
new file mode 100644
index 0000000..7ddb370
--- /dev/null
+++ b/autograder/models/dataclass/preflight_error.py
@@ -0,0 +1,24 @@
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional
+
+
+class PreflightCheckType(Enum):
+    """Types of preflight checks that can fail."""
+    FILE_CHECK = "file_check"
+    SETUP_COMMAND = "setup_command"
+
+
+@dataclass
+class PreflightError:
+    """
+    Represents an error found during pre-flight checks.
+
+    Attributes:
+        type: The type of error (file check or setup command)
+        message: The error message describing what went wrong
+        details: Optional additional context about the error
+    """
+    type: PreflightCheckType
+    message: str
+    details: Optional[dict] = None
diff --git a/autograder/core/models/result.py b/autograder/models/dataclass/result.py
similarity index 73%
rename from autograder/core/models/result.py
rename to autograder/models/dataclass/result.py
index 8cee9f0..e389dfd 100644
--- a/autograder/core/models/result.py
+++ b/autograder/models/dataclass/result.py
@@ -1,9 +1,10 @@
 from typing import List, Dict
-from autograder.core.models.test_result import TestResult
+from dataclasses import dataclass, field
+from autograder.models.dataclass.test_result import TestResult
 
-from pydantic import BaseModel, Field
 
-class Result(BaseModel):
+@dataclass
+class Result:
     """
     Represents the result of an assignment submission.
     Contains a final score generated by the Scorer class and the Graders.
@@ -12,12 +13,10 @@ class Result(BaseModel):
 
     final_score: float
     author: str
-    submission_files: Dict[str,str] = Field(default_factory=dict, alias="submission_files")
-    base_results: List[TestResult] = Field(default_factory=list)
-    bonus_results: List[TestResult] = Field(default_factory=list)
-    penalty_results: List[TestResult] = Field(default_factory=list)
-
-    model_config = {"populate_by_name": True}
+    submission_files: Dict[str, str] = field(default_factory=dict)
+    base_results: List[TestResult] = field(default_factory=list)
+    bonus_results: List[TestResult] = field(default_factory=list)
+    penalty_results: List[TestResult] = field(default_factory=list)
 
     def get_test_report(self) -> List[TestResult]:
         return self.base_results + self.bonus_results + self.penalty_results
diff --git a/autograder/models/dataclass/step_result.py b/autograder/models/dataclass/step_result.py
new file mode 100644
index 0000000..dfdf9c1
--- /dev/null
+++ b/autograder/models/dataclass/step_result.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+from typing import Any, Optional, TypeVar, Generic
+from enum import Enum
+
+T = TypeVar('T')
+
+
+class StepStatus(Enum):
+    SUCCESS = "success"
+    FAIL = "fail"
+
+
+class StepName(Enum):
+    BOOTSTRAP = "BootstrapStep"
+    LOAD_TEMPLATE = "LoadTemplateStep"
+    BUILD_TREE = "BuildTreeStep"
+    PRE_FLIGHT = "PreFlightStep"
+    GRADE = "GradeStep"
+    FEEDBACK = "FeedbackStep"
+    EXPORTER = "ExporterStep"
+
+
+@dataclass
+class StepResult(Generic[T]):
+    step: StepName
+    data: T
+    status: StepStatus = StepStatus.SUCCESS
+    error: Optional[str] = None
+    original_input: Any = None
+
+    @property
+    def is_successful(self) -> bool:
+        return self.status == StepStatus.SUCCESS and self.error is None
diff --git a/autograder/models/dataclass/submission.py b/autograder/models/dataclass/submission.py
new file mode 100644
index 0000000..98ecd87
--- /dev/null
+++ b/autograder/models/dataclass/submission.py
@@ -0,0 +1,15 @@
+from typing import List
+
+from dataclasses import dataclass
+
+@dataclass
+class SubmissionFile:
+    filename: str
+    content: str
+
+@dataclass
+class Submission:
+    username: str
+    user_id: int
+    assignment_id: int
+    submission_files: List[SubmissionFile]
diff --git a/autograder/core/models/test_result.py b/autograder/models/dataclass/test_result.py
similarity index 77%
rename from autograder/core/models/test_result.py
rename to autograder/models/dataclass/test_result.py
index 59d5b76..70bc93e 100644
--- a/autograder/core/models/test_result.py
+++ b/autograder/models/dataclass/test_result.py
@@ -1,14 +1,16 @@
-from pydantic import BaseModel, Field
-from typing import Dict, Any
+from dataclasses import dataclass, field
+from typing import Dict, Any, Optional
 
-class TestResult(BaseModel):
+
+@dataclass
+class TestResult:
     """Stores the outcome of a single test execution from the test library."""
 
     test_name: str
     score: int
     report: str
     subject_name: str = ""
-    parameters: Dict[str, Any] = Field(default_factory=dict)
+    parameters: Optional[Dict[str, Any]] = field(default_factory=dict)
 
     def get_result(self, *args, **kwargs) :
         return [self]
diff --git a/autograder/models/result_tree.py b/autograder/models/result_tree.py
new file mode 100644
index 0000000..aa18b1e
--- /dev/null
+++ b/autograder/models/result_tree.py
@@ -0,0 +1,385 @@
+"""
+Models for the Result Tree - represents executed grading results.
+The result tree mirrors the criteria structure but contains actual execution results.
+"""
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Any
+from enum import Enum
+
+
+class NodeType(Enum):
+    """Types of nodes in the result tree."""
+    CATEGORY = "category"
+    SUBJECT = "subject"
+    TEST = "test"
+
+
+@dataclass
+class ResultNode:
+    """
+    Base node for the result tree.
+
+    Represents a grading category or subject with a calculated score
+    based on its children's scores and weights.
+    """
+    name: str
+    node_type: NodeType
+    weight: float
+    score: float = 0.0
+    max_possible: float = 100.0
+    children: List['ResultNode'] = field(default_factory=list)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def calculate_score(self) -> float:
+        """
+        Calculate this node's score based on children.
+        For leaf nodes (tests), score is already set.
+        For parent nodes, calculate weighted average of children.
+
+        Special case: ROOT node with BASE/BONUS/PENALTY uses additive scoring:
+        - Base score (0-100)
+        - Bonus adds points (bonus_score * bonus_weight / 100)
+        - Penalty subtracts points (penalty_score * penalty_weight / 100)
+        """
+        if self.node_type == NodeType.TEST:
+            # Leaf node - score already set from test execution
+            return self.score
+
+        if not self.children:
+            return 0.0
+
+        # Calculate children scores first
+        for child in self.children:
+            child.calculate_score()
+
+        # Check if this is a ROOT node with BASE/BONUS/PENALTY categories
+        child_names = {c.name.lower() for c in self.children}
+        is_root_with_categories = (
+            self.name.lower() == "root" and
+            "base" in child_names
+        )
+
+        if is_root_with_categories:
+            # Additive scoring for BASE/BONUS/PENALTY
+            base_score = 0.0
+            bonus_points = 0.0
+            penalty_points = 0.0
+
+            for child in self.children:
+                child_name = child.name.lower()
+                if child_name == "base":
+                    base_score = child.score
+                elif child_name == "bonus":
+                    # Bonus adds: (bonus_score / 100) * bonus_weight
+                    bonus_points = (child.score / 100.0) * child.weight
+                elif child_name == "penalty":
+                    # Penalty subtracts: (penalty_score / 100) * penalty_weight
+                    penalty_points = (child.score / 100.0) * child.weight
+
+            # Final score = base + bonus - penalty (capped at 0-100)
+            self.score = max(0.0, min(100.0, base_score + bonus_points - penalty_points))
+        else:
+            # Standard weighted average for other nodes
+            total_weight = sum(c.weight for c in self.children)
+            if total_weight == 0:
+                return 0.0
+
+            weighted_sum = sum(c.score * c.weight for c in self.children)
+            self.score = weighted_sum / total_weight
+
+        return self.score
+
+    def to_dict(self) -> dict:
+        """Convert result node to dictionary representation."""
+        return {
+            "name": self.name,
+            "type": self.node_type.value,
+            "weight": self.weight,
+            "score": round(self.score, 2),
+            "max_possible": self.max_possible,
+            "children": [child.to_dict() for child in self.children],
+            "metadata": self.metadata
+        }
+
+
+@dataclass
+class TestResultNode(ResultNode):
+    """
+    Leaf node representing a single test execution.
+
+    Contains the actual test result and execution details.
+    """
+    test_name: str = ""
+    test_function: Any = None  # Reference to the actual test function
+    test_params: List[Any] = field(default_factory=list)
+    file_target: Optional[str] = None
+    execution_result: Optional[Any] = None  # TestResult object after execution
+    error_message: Optional[str] = None
+    passed: bool = False
+
+    def __post_init__(self):
+        """Set node type to TEST."""
+        self.node_type = NodeType.TEST
+
+    def execute(self, submission_files: Dict[str, Any]) -> float:
+        """
+        Execute the test function with provided parameters.
+        Updates score, passed status, and execution_result.
+
+        Returns:
+            The test score (0-100)
+        """
+        if not self.test_function:
+            self.error_message = "No test function assigned"
+            self.score = 0.0
+            self.passed = False
+            return 0.0
+
+        try:
+            # Execute the test function
+            # The test function should return a TestResult object
+            self.execution_result = self.test_function.execute(
+                *self.test_params,
+                files=submission_files
+            )
+
+            # Extract score from result
+            if hasattr(self.execution_result, 'score'):
+                self.score = float(self.execution_result.score)
+            else:
+                self.score = 100.0 if self.execution_result else 0.0
+
+            # Check if test passed (score >= 50 is considered passing)
+            self.passed = self.score >= 50
+
+            # Store result report/message
+            if hasattr(self.execution_result, 'report'):
+                self.metadata['report'] = self.execution_result.report
+            elif hasattr(self.execution_result, 'message'):
+                self.metadata['message'] = self.execution_result.message
+
+            return self.score
+
+        except Exception as e:
+            self.error_message = f"Test execution failed: {str(e)}"
+            self.score = 0.0
+            self.passed = False
+            self.metadata['error'] = str(e)
+            return 0.0
+
+    def to_dict(self) -> dict:
+        """Convert test result node to dictionary with execution details."""
+        base_dict = super().to_dict()
+        base_dict.update({
+            "test_name": self.test_name,
+            "file_target": self.file_target,
+            "passed": self.passed,
+            "error_message": self.error_message,
+            "params": self.test_params
+        })
+        return base_dict
+
+
+@dataclass
+class ResultTree:
+    """
+    Complete result tree for a grading session.
+
+    Contains the root node and provides methods for score calculation
+    and tree traversal.
+    """
+    root: ResultNode
+    template_name: Optional[str] = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def calculate_final_score(self) -> float:
+        """
+        Calculate and return the final score by traversing the tree.
+
+        Returns:
+            Final score (0-100)
+        """
+        return self.root.calculate_score()
+
+    def get_all_test_results(self) -> List[TestResultNode]:
+        """Get all test result nodes from the tree."""
+        results = []
+        self._collect_tests(self.root, results)
+        return results
+
+    def _collect_tests(self, node: ResultNode, collector: List[TestResultNode]):
+        """Recursively collect all test nodes."""
+        if isinstance(node, TestResultNode):
+            collector.append(node)
+        else:
+            for child in node.children:
+                self._collect_tests(child, collector)
+
+    def get_failed_tests(self) -> List[TestResultNode]:
+        """Get all failed test nodes."""
+        return [test for test in self.get_all_test_results() if not test.passed]
+
+    def get_passed_tests(self) -> List[TestResultNode]:
+        """Get all passed test nodes."""
+        return [test for test in self.get_all_test_results() if test.passed]
+
+    def to_dict(self) -> dict:
+        """Convert entire result tree to dictionary."""
+        return {
+            "template_name": self.template_name,
+            "final_score": round(self.root.score, 2),
+            "tree": self.root.to_dict(),
+            "metadata": self.metadata,
+            "summary": {
+                "total_tests": len(self.get_all_test_results()),
+                "passed_tests": len(self.get_passed_tests()),
+                "failed_tests": len(self.get_failed_tests())
+            }
+        }
+
+    def print_tree(self, show_details: bool = True):
+        """
+        Print a visual representation of the result tree.
+
+        Args:
+            show_details: If True, show test parameters and error messages
+        """
+        print("\n" + "=" * 70)
+        print("🎯 RESULT TREE")
+        print("=" * 70)
+
+        # Print header info
+        if self.template_name:
+            print(f"📋 Template: {self.template_name}")
+
+        print(f"🏆 Final Score: {self.root.score:.2f}/100")
+
+        summary = {
+            "total": len(self.get_all_test_results()),
+            "passed": len(self.get_passed_tests()),
+            "failed": len(self.get_failed_tests())
+        }
+        print(f"📊 Tests: {summary['total']} total | "
+              f"✅ {summary['passed']} passed | "
+              f"❌ {summary['failed']} failed")
+
+        print("\n" + "-" * 70)
+
+        # Print tree structure
+        self._print_node(self.root, "", show_details)
+
+        print("=" * 70 + "\n")
+
+    def _print_node(self, node: ResultNode, prefix: str, show_details: bool):
+        """Recursively print a node and its children."""
+        if isinstance(node, TestResultNode):
+            self._print_test_node(node, prefix, show_details)
+        else:
+            self._print_parent_node(node, prefix, show_details)
+
+    def _print_parent_node(self, node: ResultNode, prefix: str, show_details: bool):
+        """Print a category or subject node."""
+        # Choose icon based on node type
+        if node.node_type == NodeType.CATEGORY:
+            icon = "📁"
+            name = node.name.upper()
+        else:  # SUBJECT
+            icon = "📘"
+            name = node.name
+
+        # Color code score
+        score_str = f"{node.score:.1f}"
+        if node.score >= 80:
+            score_color = "🟢"
+        elif node.score >= 60:
+            score_color = "🟡"
+        else:
+            score_color = "🔴"
+
+        print(f"{prefix}{icon} {name} "
+              f"[weight: {node.weight:.0f}%] "
+              f"{score_color} {score_str}/100")
+
+        # Print children
+        for child in node.children:
+            self._print_node(child, prefix + "    ", show_details)
+
+    def _print_test_node(self, node: TestResultNode, prefix: str, show_details: bool):
+        """Print a test result node."""
+        # Status icon
+        status = "✅" if node.passed else "❌"
+
+        # Score with color
+        if node.score >= 80:
+            score_color = "🟢"
+        elif node.score >= 60:
+            score_color = "🟡"
+        else:
+            score_color = "🔴"
+
+        # Basic test info
+        test_info = f"{prefix}🧪 {node.test_name} {status}"
+
+        # Add file target if present
+        if node.file_target:
+            test_info += f" [file: {node.file_target}]"
+
+        # Add score
+        test_info += f" {score_color} {node.score:.1f}/100"
+
+        print(test_info)
+
+        # Show details if requested
+        if show_details:
+            # Show parameters
+            if node.test_params:
+                params_str = ", ".join(str(p) for p in node.test_params)
+                print(f"{prefix}    ⚙️  params: ({params_str})")
+
+            # Show error message if failed
+            if node.error_message:
+                print(f"{prefix}    ⚠️  error: {node.error_message}")
+
+            # Show metadata report/message
+            if 'report' in node.metadata:
+                report = node.metadata['report']
+                # Truncate long reports
+                if len(report) > 80:
+                    report = report[:77] + "..."
+                print(f"{prefix}    💬 {report}")
+
+    def print_summary(self):
+        """Print a compact summary of the results."""
+        print("\n" + "=" * 70)
+        print("📊 GRADING SUMMARY")
+        print("=" * 70)
+
+
+        print(f"\n🏆 Final Score: {self.root.score:.2f}/100")
+
+        # Test statistics
+        all_tests = self.get_all_test_results()
+        passed = self.get_passed_tests()
+        failed = self.get_failed_tests()
+
+        print(f"\n📈 Test Results:")
+        print(f"   Total:  {len(all_tests)}")
+        print(f"   ✅ Passed: {len(passed)} ({len(passed)/len(all_tests)*100:.1f}%)")
+        print(f"   ❌ Failed: {len(failed)} ({len(failed)/len(all_tests)*100:.1f}%)")
+
+        # Score distribution
+        if all_tests:
+            avg_score = sum(t.score for t in all_tests) / len(all_tests)
+            print(f"\n📊 Average Test Score: {avg_score:.2f}")
+
+        # Show failed tests if any
+        if failed:
+            print(f"\n❌ Failed Tests:")
+            for test in failed:
+                print(f"   • {test.test_name}: {test.score:.1f}/100")
+                if test.error_message:
+                    print(f"     Error: {test.error_message}")
+
+        print("=" * 70 + "\n")
+
+
diff --git a/autograder/pipeline.py b/autograder/pipeline.py
new file mode 100644
index 0000000..2edde5f
--- /dev/null
+++ b/autograder/pipeline.py
@@ -0,0 +1,50 @@
+from autograder.models.dataclass.grading_result import GradingResult
+from autograder.models.abstract.step import Step
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
+
+
+class AutograderPipeline:
+    def __init__(self):
+        self._steps = []
+
+    def add_step(self, step: Step) -> None:
+        self._steps.append(step)
+
+    def run(self, input_data:'Submission'):
+        result = StepResult(
+            step=StepName.BOOTSTRAP,
+            data=input_data,
+            status=StepStatus.SUCCESS)
+        #Initialize result object with input data
+        pipeline_execution = PipelineExecution(step_results=[], assignment_id="assignment_123", submission=input_data) #Example assignment_id
+        pipeline_execution.add_step_result(result)
+
+        for step in self._steps:
+            print("Executing step:", step.__class__.__name__)
+            if not result.get_previous_step.is_successful:
+                break
+            try:
+                result = step.execute(result)
+            except Exception as e:
+                StepResult(
+                    step=step.__class__.__name__,
+                    data=None,
+                    status=StepStatus.FAIL,
+                    error=str(e),
+                )
+
+        if not result.is_successful: #Change this to report a PipelineExecution error with result details
+            return GradingResult( #Maybe return a ErrorResponse object?
+                final_score=0.0,
+                status="error",
+                feedback=None,
+                result_tree=None,
+                error=result.error,
+                failed_at_step=result.failed_at_step,
+            )
+        else:
+            return result.get_step_result(StepName.GRADE).data # How to return with feedback? How to know when there's no feedback?
+
+
+
diff --git a/autograder/builder/template_library/templates/__init__.py b/autograder/services/__init__.py
similarity index 100%
rename from autograder/builder/template_library/templates/__init__.py
rename to autograder/services/__init__.py
diff --git a/autograder/services/criteria_tree_service.py b/autograder/services/criteria_tree_service.py
new file mode 100644
index 0000000..5374414
--- /dev/null
+++ b/autograder/services/criteria_tree_service.py
@@ -0,0 +1,47 @@
+import logging
+
+from autograder.models.abstract.template import Template
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.criteria_tree import CriteriaTree
+from autograder.services.parsers.criteria_tree import CriteriaTreeParser
+
+
+class CriteriaTreeService:
+    """
+    Service for building criteria trees from validated configuration.
+
+    The tree building process now:
+    1. Validates criteria config using Pydantic models
+    2. Matches test functions from template during building
+    3. Embeds test functions and parameters directly in TestNodes
+    4. Balances weights across siblings
+
+    This eliminates the need for pre-executed trees and improves error handling.
+    """
+
+    def __init__(self):
+        self.logger = logging.getLogger("CriteriaTreeService")
+
+    def build_tree(
+        self, criteria_config: CriteriaConfig, template: Template
+    ) -> CriteriaTree:
+        """
+        Build a complete criteria tree from validated configuration.
+
+        Args:
+            criteria_config: Validated criteria configuration
+            template: Template containing test functions
+
+        Returns:
+            Complete CriteriaTree with embedded test functions
+
+        Raises:
+            ValueError: If test function not found in template
+        """
+        self.logger.info("Building criteria tree")
+
+        parser = CriteriaTreeParser(template)
+        tree = parser.parse_tree(criteria_config)
+
+        self.logger.info("Criteria tree built successfully")
+        return tree
diff --git a/autograder/services/grader_service.py b/autograder/services/grader_service.py
new file mode 100644
index 0000000..a440ce8
--- /dev/null
+++ b/autograder/services/grader_service.py
@@ -0,0 +1,21 @@
+import logging
+
+from typing import Dict, Any, Optional
+from autograder.models.criteria_tree import CriteriaTree
+from autograder.models.result_tree import ResultTree
+from autograder.services.criteria_tree_service import CriteriaTreeService
+from autograder.services.graders.criteria_tree import CriteriaTreeGrader
+
+
+class GraderService:
+    def __init__(self):
+        self.logger = logging.getLogger("GraderService")
+        self._criteria_service = CriteriaTreeService()
+
+    def grade_from_tree(
+        self,
+        criteria_tree: CriteriaTree,
+        submission_files: Dict[str, Any],
+    ) -> ResultTree:
+        grader = CriteriaTreeGrader(submission_files)
+        return grader.grade(criteria_tree)
diff --git a/autograder/core/__init__.py b/autograder/services/graders/__init__.py
similarity index 100%
rename from autograder/core/__init__.py
rename to autograder/services/graders/__init__.py
diff --git a/autograder/services/graders/criteria_tree.py b/autograder/services/graders/criteria_tree.py
new file mode 100644
index 0000000..effa77f
--- /dev/null
+++ b/autograder/services/graders/criteria_tree.py
@@ -0,0 +1,134 @@
+import logging
+from typing import Dict, Optional, Sequence
+from typing_extensions import override
+from autograder.models.criteria_tree import (
+    CategoryNode,
+    CriteriaTree,
+    SubjectNode,
+    TestNode,
+)
+from autograder.models.result_tree import (
+    NodeType,
+    ResultNode,
+    ResultTree,
+    TestResultNode,
+)
+from autograder.utils.processers.criteria_tree import CriteriaTreeProcesser
+
+
+class CriteriaTreeGrader(CriteriaTreeProcesser):
+    def __init__(self, submission_files: Dict) -> None:
+        self.logger = logging.getLogger("GraderService")
+        self.__submission_files = submission_files
+
+    def __balance_nodes(self, nodes: Sequence[ResultNode], factor: float) -> None:
+        if len(nodes) == 0:
+            return
+
+        total_weight = sum(node.weight for node in nodes) * factor
+
+        if total_weight == 0:
+            equal_weight = 100.0 / len(nodes)
+            for node in nodes:
+                node.weight = equal_weight
+        elif total_weight != 100:
+            scale_factor = 100.0 / total_weight
+            for node in nodes:
+                node.weight *= scale_factor
+
+    def __process_holder(self, holder: CategoryNode | SubjectNode) -> ResultNode:
+        result = ResultNode(
+            name=holder.name,
+            node_type=NodeType.CATEGORY
+            if isinstance(holder, CategoryNode)
+            else NodeType.SUBJECT,
+            weight=holder.weight,
+        )
+        if holder.subjects and holder.tests:
+            if not holder.subjects_weight:
+                raise ValueError(f"missing 'subjects_weight' for {holder.name}")
+            subjects_factor = holder.subjects_weight / 100.0
+            tests_factor = 1 - subjects_factor
+        else:
+            subjects_factor = 1.0
+            tests_factor = 1.0
+
+        if holder.subjects:
+            subject_results = [
+                self.process_subject(inner_subject) for inner_subject in holder.subjects
+            ]
+            self.__balance_nodes(subject_results, subjects_factor)
+            result.children.extend(subject_results)
+
+        if holder.tests:
+            test_results = [self.process_test(test) for test in holder.tests]
+            self.__balance_nodes(test_results, tests_factor)
+            result.children.extend(test_results)
+
+        return result
+
+    @override
+    def process_subject(self, subject: SubjectNode) -> ResultNode:
+        return self.__process_holder(subject)
+
+    @override
+    def process_test(self, test: TestNode) -> TestResultNode:
+        test_result = TestResultNode(
+            name=test.name,
+            node_type=NodeType.TEST,
+            weight=100.0,
+            test_name=test.name,
+            test_function=test.test_function,
+            test_params=test.parameters,
+            file_target=test.file_target,
+        )
+        test_result.execute(self.__submission_files)
+        return test_result
+
+    @override
+    def process_category(self, category: CategoryNode) -> ResultNode:
+        return self.__process_holder(category)
+
+    def __find_first_test(self, node: CategoryNode | SubjectNode) -> Optional[TestNode]:
+        """Find the first test node in the tree."""
+        if isinstance(node, TestNode):
+            return node
+
+        if hasattr(node, "tests") and node.tests:
+            return node.tests[0]
+
+        if hasattr(node, "subjects") and node.subjects:
+            for subject in node.subjects:
+                result = self.__find_first_test(subject)
+                if result:
+                    return result
+
+        return None
+
+    def grade(self, tree: CriteriaTree) -> ResultTree:
+
+        root = ResultNode(name="root", node_type=NodeType.CATEGORY, weight=100.0)
+
+        base_result = self.process_category(tree.base)
+        root.children.append(base_result)
+
+        if tree.bonus:
+            bonus_result = self.process_category(tree.bonus)
+            root.children.append(bonus_result)
+
+        if tree.penalty:
+            penalty_result = self.process_category(tree.penalty)
+            root.children.append(penalty_result)
+
+        result_tree = ResultTree(root)
+
+        # Handle AI executor batch if needed
+        # Note: For tree-based grading, the template is embedded in test nodes
+        first_test = self.__find_first_test(tree.base)
+        if first_test and hasattr(first_test, "test_function"):
+            test_func = first_test.test_function
+            if hasattr(test_func, "executor") and test_func.executor:
+                self.logger.info("Executing AI batch requests")
+                test_func.executor.stop()
+
+        return result_tree
diff --git a/autograder/core/grading/__init__.py b/autograder/services/parsers/__init__.py
similarity index 100%
rename from autograder/core/grading/__init__.py
rename to autograder/services/parsers/__init__.py
diff --git a/autograder/services/parsers/criteria_tree.py b/autograder/services/parsers/criteria_tree.py
new file mode 100644
index 0000000..dcf590c
--- /dev/null
+++ b/autograder/services/parsers/criteria_tree.py
@@ -0,0 +1,93 @@
+from typing import List, Optional
+
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.config.category import CategoryConfig
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.config.subject import SubjectConfig
+from autograder.models.config.test import TestConfig
+from autograder.models.criteria_tree import (
+    CriteriaTree,
+    SubjectNode,
+    TestNode,
+    CategoryNode,
+)
+
+
+class CriteriaTreeParser:
+    def __init__(self, template: Template) -> None:
+        self.__template: Template = template
+
+    def __parse_subjects(self, configs: List[SubjectConfig]) -> List[SubjectNode]:
+        subjects = [self.__parse_subject(config) for config in configs]
+        self.__balance_subject_weights(subjects)
+        return subjects
+
+    def __parse_subject(self, config: SubjectConfig) -> SubjectNode:
+        subject = SubjectNode(config.subject_name, config.weight)
+
+        subject.subjects_weight = config.subjects_weight
+
+        if config.subjects:
+            subject.subjects = self.__parse_subjects(config.subjects)
+
+        if config.tests:
+            subject.tests = self.__parse_tests(config.tests)
+
+        return subject
+
+    def __balance_subject_weights(self, subjects: List[SubjectNode]) -> None:
+        total_weight = sum(s.weight for s in subjects)
+        if total_weight > 0 and total_weight != 100:
+            scaling_factor = 100 / total_weight
+            for subject in subjects:
+                subject.weight = subject.weight * scaling_factor
+
+    def __parse_tests(self, test_configs: List[TestConfig]) -> List[TestNode]:
+        return [self.__parse_test(test_item) for test_item in test_configs]
+
+    def __find_test_function(self, name: str) -> Optional[TestFunction]:
+        try:
+            return self.__template.get_test(name)
+        except (AttributeError, KeyError):
+            return None
+
+    def __parse_test(self, config: TestConfig) -> TestNode:
+        test_function = self.__find_test_function(config.name)
+        if not test_function:
+            raise ValueError(f"Couldn't find test {config.name}")
+
+        test = TestNode(
+            config.name,
+            config.name,
+            test_function,
+            config.get_args_list() or list(),
+            config.file,
+        )
+
+        return test
+
+    def __parse_category(self, category_name, config: CategoryConfig) -> CategoryNode:
+        category = CategoryNode(category_name, config.weight)
+
+        category.subjects_weight = config.subjects_weight
+
+        if config.subjects:
+            category.add_subjects(self.__parse_subjects(config.subjects))
+
+        if config.tests:
+            category.tests = self.__parse_tests(config.tests)
+
+        return category
+
+    def parse_tree(self, config: CriteriaConfig) -> CriteriaTree:
+        base_category = self.__parse_category("base", config.base)
+        tree = CriteriaTree(base_category)
+
+        if config.bonus:
+            tree.bonus = self.__parse_category("bonus", config.bonus)
+
+        if config.penalty:
+            tree.penalty = self.__parse_category("penalty", config.penalty)
+
+        return tree
diff --git a/autograder/services/pre_flight_service.py b/autograder/services/pre_flight_service.py
new file mode 100644
index 0000000..1e89219
--- /dev/null
+++ b/autograder/services/pre_flight_service.py
@@ -0,0 +1,80 @@
+import logging
+from typing import List
+from autograder.models.dataclass.preflight_error import PreflightError, PreflightCheckType
+
+
+class PreFlightService:
+    def __init__(self, setup_config):
+        self.required_files = setup_config.get('required_files', [])
+        self.setup_commands = setup_config.get('setup_commands', [])
+        self.fatal_errors: List[PreflightError] = []
+        self.logger = logging.getLogger("PreFlight")
+
+    def check_required_files(self, submission_files) -> bool:
+        """
+        Checks for the existence of required files in the submission.
+        Returns True if all required files exist, False otherwise.
+        """
+        self.logger.debug("Checking required files")
+
+        if not self.required_files:
+            self.logger.debug("No required files to check")
+            return True
+
+        for file in self.required_files:
+            if file not in submission_files:
+                error_msg = f"**Erro:** Arquivo ou diretório obrigatório não encontrado: `'{file}'`"
+                self.logger.error(error_msg)
+                self.fatal_errors.append(PreflightError(
+                    type=PreflightCheckType.FILE_CHECK,
+                    message=error_msg,
+                    details={"missing_file": file}
+                ))
+
+        # Return True only if no file check errors were added
+        file_check_errors = [e for e in self.fatal_errors if e.type == PreflightCheckType.FILE_CHECK]
+        return len(file_check_errors) == 0
+
+    def check_setup_commands(self) -> bool:
+        """
+        Executes setup commands in a sandbox environment.
+        Returns True if all commands succeed, False otherwise.
+
+        TODO: Implement sandbox container creation and command execution.
+        Note: Should validate that sandbox is required if setup_commands are present.
+        """
+        self.logger.debug("Checking setup commands")
+
+        if not self.setup_commands:
+            self.logger.debug("No setup commands to execute")
+            return True
+
+        # TODO: Implement actual setup command execution
+        # This should:
+        # 1. Create sandbox container
+        # 2. Execute each command
+        # 3. Check exit codes
+        # 4. Append PreflightError if any command fails
+
+        return True
+
+    def has_errors(self) -> bool:
+        """Check if any fatal errors were found during preflight checks."""
+        return len(self.fatal_errors) > 0
+
+    def get_error_messages(self) -> List[str]:
+        """Get all error messages as a list of strings."""
+        return [error.message for error in self.fatal_errors]
+
+
+"""
+Setup commands here is a problem. 
+The pre-flight service should be responsible for also creating the sandbox container
+and executing the setup commmands, so that if one of them fails, the pipeline already stops
+However, it's important to check if there's really a need for creating a sandbox. 
+Maybe add a config validation step before the pipeline starts?
+Example: If someone sets setup commands but the template does not require a sandbox,
+it should raise a configuration error before starting the pipeline.
+"""
+
+
diff --git a/autograder/core/models/__init__.py b/autograder/services/report/__init__.py
similarity index 100%
rename from autograder/core/models/__init__.py
rename to autograder/services/report/__init__.py
diff --git a/autograder/services/report/ai_reporter.py b/autograder/services/report/ai_reporter.py
new file mode 100644
index 0000000..1d20251
--- /dev/null
+++ b/autograder/services/report/ai_reporter.py
@@ -0,0 +1,5 @@
+
+class AiReporter:
+    def generate_report(self, result):
+        # Placeholder for AI report generation logic
+        return "AI-generated report based on analysis data."
\ No newline at end of file
diff --git a/autograder/services/report/default_reporter.py b/autograder/services/report/default_reporter.py
new file mode 100644
index 0000000..860892e
--- /dev/null
+++ b/autograder/services/report/default_reporter.py
@@ -0,0 +1,5 @@
+
+
+class DefaultReporter:
+    def generate_report(self, results):
+        pass
\ No newline at end of file
diff --git a/autograder/services/report/reporter_service.py b/autograder/services/report/reporter_service.py
new file mode 100644
index 0000000..3493267
--- /dev/null
+++ b/autograder/services/report/reporter_service.py
@@ -0,0 +1,10 @@
+from autograder.services.report.default_reporter import DefaultReporter
+from autograder.services.report.ai_reporter import AiReporter
+
+class ReporterService:
+    def __init__(self, feedback_mode: str):
+        if feedback_mode == "ai":
+            self._reporter = DefaultReporter()
+        else:
+            self._reporter = AiReporter()
+
diff --git a/autograder/services/template_library_service.py b/autograder/services/template_library_service.py
new file mode 100644
index 0000000..756da05
--- /dev/null
+++ b/autograder/services/template_library_service.py
@@ -0,0 +1,21 @@
+from autograder.models.abstract.template import Template
+from autograder.template_library.web_dev import WebDevTemplate
+#from autograder.template_library.api_testing import ApiTestingTemplate
+#from autograder.template_library.input_output import InputOutputTemplate
+#from autograder.template_library.essay_grader import EssayGraderTemplate
+
+class TemplateLibraryService:
+    def __init__(self):
+        pass
+
+    def start_template(self, template_name: str) -> Template:
+        """Initialize and return the template class based on the template name.
+           If template requires sandboxing, it creates a sandboxed instance.
+        """
+        return WebDevTemplate() #That's a placeholder
+
+    def get_template_info(self, template_name: str) -> dict:
+        """Return metadata about the template."""
+        pass
+
+
diff --git a/autograder/core/utils/upstash_driver.py b/autograder/services/upstash_driver.py
similarity index 98%
rename from autograder/core/utils/upstash_driver.py
rename to autograder/services/upstash_driver.py
index c212c73..3c2832a 100644
--- a/autograder/core/utils/upstash_driver.py
+++ b/autograder/services/upstash_driver.py
@@ -4,7 +4,7 @@
 from upstash_redis import Redis
 
 load_dotenv()
-class Driver:
+class UpstashDriver:
     def __init__(self,redis):
         self.redis = redis
 
diff --git a/autograder/core/report/__init__.py b/autograder/steps/__init__.py
similarity index 100%
rename from autograder/core/report/__init__.py
rename to autograder/steps/__init__.py
diff --git a/autograder/steps/build_tree_step.py b/autograder/steps/build_tree_step.py
new file mode 100644
index 0000000..3ca7bbc
--- /dev/null
+++ b/autograder/steps/build_tree_step.py
@@ -0,0 +1,60 @@
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.services.criteria_tree_service import CriteriaTreeService
+from autograder.models.abstract.step import Step
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
+
+
+class BuildTreeStep(Step):
+    """
+    Step that builds a CriteriaTree from validated criteria configuration.
+
+    This step is used when grading multiple submissions with the same criteria.
+    The tree is built once and reused for efficiency.
+    """
+
+    def __init__(self, criteria_json: dict):
+        """
+        Initialize the build tree step.
+
+        Args:
+            criteria_json: Raw criteria configuration dictionary
+        """
+        self._criteria_json = criteria_json
+        self._criteria_tree_service = CriteriaTreeService()
+
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
+        """
+        Build a criteria tree from the configuration and template.
+
+        Args:
+            input: Template containing test functions
+
+        Returns:
+            StepResult containing the built CriteriaTree
+        """
+        try:
+            # Validate criteria configuration
+            criteria_config = CriteriaConfig.from_dict(self._criteria_json)
+            template = input.get_step_result(StepName.LOAD_TEMPLATE).data
+            # Build the criteria tree with embedded test functions
+            criteria_tree = self._criteria_tree_service.build_tree(
+                criteria_config,
+                template
+            )
+
+            return input.add_step_result(StepResult(
+                step="BuildTreeStep",
+                data=criteria_tree,
+                status=StepStatus.SUCCESS,
+                original_input=input
+            ))
+
+        except Exception as e:
+            return input.add_step_result(StepResult(
+                step="BuildTreeStep",
+                data=None,
+                status=StepStatus.FAIL,
+                error=f"Failed to build criteria tree: {str(e)}",
+                original_input=input
+            ))
diff --git a/autograder/steps/export_step.py b/autograder/steps/export_step.py
new file mode 100644
index 0000000..75e00aa
--- /dev/null
+++ b/autograder/steps/export_step.py
@@ -0,0 +1,31 @@
+from autograder.models.abstract.step import Step
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
+
+
+class ExporterStep(Step):
+    def __init__(self, remote_driver):
+        self._remote_driver = remote_driver # UpstashDriver
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
+        try:
+            # Extract username and score from input
+            username = input.submission.username
+            score = input.get_step_result(StepName.GRADE).data.final_score
+
+            # Set the score using UpstashDriver
+            self._remote_driver.set_score(username, score)
+
+            # Return success result
+            return input.add_step_result(StepResult(
+                step=StepName.EXPORTER,
+                data={"username": username, "score": score},
+                status=StepStatus.SUCCESS
+            ))
+        except Exception as e:
+            # Return failure result
+            return input.add_step_result(StepResult(
+                step=StepName.EXPORTER,
+                data=None,
+                status=StepStatus.FAIL,
+                error=str(e),
+            ))
diff --git a/autograder/steps/feedback_step.py b/autograder/steps/feedback_step.py
new file mode 100644
index 0000000..0ec27a4
--- /dev/null
+++ b/autograder/steps/feedback_step.py
@@ -0,0 +1,22 @@
+from autograder.models.dataclass.grading_result import GradingResult
+from autograder.models.abstract.step import Step
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepName
+from autograder.services.report.reporter_service import ReporterService
+
+
+class FeedbackStep(Step):
+    def __init__(self,
+                 reporter_service: ReporterService,
+                 feedback_config: dict):
+        self._reporter_service = reporter_service
+        self._feedback_config = feedback_config
+
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
+        """Adds feedback to the grading result using the reporter service."""
+        try:
+            result_tree = input.get_step_result(StepName.GRADE).data
+            feedback = self._reporter_service.generate_feedback(
+                grading_result=result_tree,
+                feedback_config=self._feedback_config
+            ) #TODO: Implement generate_feedback method @joaovitoralvarenga
diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
new file mode 100644
index 0000000..e152c91
--- /dev/null
+++ b/autograder/steps/grade_step.py
@@ -0,0 +1,68 @@
+
+from autograder.models.dataclass.grading_result import GradingResult
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
+from autograder.models.abstract.step import Step
+from autograder.services.grader_service import GraderService
+
+
+class GradeStep(Step):
+    """
+    Step that grades a submission using either a CriteriaTree or raw criteria configuration.
+
+    This step intelligently determines which grading method to use:
+    - If input is CriteriaTree: Use grade_from_tree (for multiple submissions)
+    - If input is Template: Use grade_from_config (for single submission)
+    """
+
+    def __init__(
+        self    ):
+        """
+        Initialize the grade step.
+
+        Args:
+            criteria_json: Raw criteria configuration (only needed for single submission mode)
+            submission_files: Student submission files
+        """
+        self._grader_service = GraderService()
+
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
+        """
+        Grade a submission based on the input type.
+
+        Args:
+            _input: Either a CriteriaTree (multi-submission mode) or Template (single submission mode)
+
+        Returns:
+            StepResult containing GradingResult with scores and result tree
+        """
+        try:
+            criteria_tree = input.get_step_result(StepName.BUILD_TREE).data
+            result_tree = self._grader_service.grade_from_tree(
+                criteria_tree=criteria_tree,
+                submission_files=input.submission.submission_files
+            )
+
+            # Create grading result
+            final_score = result_tree.calculate_final_score()
+
+            grading_result = GradingResult(
+                final_score=final_score, status="success", result_tree=result_tree
+            )
+
+            return input.add_step_result(StepResult(
+                step=StepName.GRADE,
+                data=grading_result,
+                status=StepStatus.SUCCESS,
+                original_input=input
+            ))
+
+        except Exception as e:
+            # Return error result
+            return input.add_step_result(StepResult(
+                step="GradeStep",
+                data=None,
+                status=StepStatus.FAIL,
+                error=str(e),
+                original_input=input,
+            ))
diff --git a/autograder/steps/load_template_step.py b/autograder/steps/load_template_step.py
new file mode 100644
index 0000000..9d2d133
--- /dev/null
+++ b/autograder/steps/load_template_step.py
@@ -0,0 +1,18 @@
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.services.template_library_service import TemplateLibraryService
+from autograder.models.abstract.step import Step
+
+
+class TemplateLoaderStep(Step):
+    def __init__(self, template_name: str, custom_template = None):
+        self._template_name = template_name
+        self._custom_template = custom_template
+        self._template_service = TemplateLibraryService()
+
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
+        if self._custom_template:
+            return self._template_service.load_custom_template(self._custom_template) #TODO: Implement Custom Template Loading with Sandboxed Env
+        else:
+            return self._template_service.load_builtin_template(self._template_name) # Load built-in template similar to custom to avoid code duplication
+
+
diff --git a/autograder/steps/pre_flight_step.py b/autograder/steps/pre_flight_step.py
new file mode 100644
index 0000000..dd0e450
--- /dev/null
+++ b/autograder/steps/pre_flight_step.py
@@ -0,0 +1,74 @@
+from autograder.models.abstract.step import Step
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
+from autograder.services.pre_flight_service import PreFlightService
+
+
+class PreFlightStep(Step):
+    """
+    Pre-flight check step that validates submission before grading begins.
+
+    Checks are run in order:
+    1. Required files check
+    2. Setup commands check (only if files check passes)
+
+    If any check fails, the step returns a FAIL status with error details.
+    """
+
+    def __init__(self, setup_config):
+        self._setup_config = setup_config
+        self._pre_flight_service = PreFlightService(setup_config)
+
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
+        """
+        Execute pre-flight checks on the submission.
+
+        Args:
+            input: PipelineExecution containing submission data
+
+        Returns:
+            StepResult with status SUCCESS if all checks pass, FAIL otherwise
+        """
+        # Check required files first
+        submission_files = input.submission.submission_files
+        if self._setup_config.get('required_files'):
+            files_ok = self._pre_flight_service.check_required_files(submission_files)
+            if not files_ok:
+                # File check failed, don't continue to setup commands
+                return input.add_step_result(StepResult(
+                        step=StepName.PRE_FLIGHT,
+                        data=input,
+                        status=StepStatus.FAIL,
+                        error=self._format_errors(),
+                        original_input=input
+                        ))
+
+
+        # Check setup commands only if file check passed
+        if self._setup_config.get('setup_commands'):
+            setup_ok = self._pre_flight_service.check_setup_commands()
+            if not setup_ok:
+                return input.add_step_result(StepResult(
+                    step=StepName.PRE_FLIGHT,
+                    data=input,
+                    status=StepStatus.FAIL,
+                    error=self._format_errors(),
+                    original_input=input
+                ))
+
+        # All checks passed
+        return input.add_step_result(StepResult(
+            step=StepName.PRE_FLIGHT,
+            data=input,
+            status=StepStatus.SUCCESS,
+            original_input=input
+        ))
+
+    def _format_errors(self) -> str:
+        """Format all preflight errors into a single error message."""
+        if not self._pre_flight_service.has_errors():
+            return "Unknown preflight error"
+
+        error_messages = self._pre_flight_service.get_error_messages()
+        return "\n".join(error_messages)
+
diff --git a/autograder/core/utils/__init__.py b/autograder/template_library/__init__.py
similarity index 100%
rename from autograder/core/utils/__init__.py
rename to autograder/template_library/__init__.py
diff --git a/autograder/builder/template_library/templates/api_testing.py b/autograder/template_library/api_testing.py
similarity index 66%
rename from autograder/builder/template_library/templates/api_testing.py
rename to autograder/template_library/api_testing.py
index 3d6d988..3bf43f9 100644
--- a/autograder/builder/template_library/templates/api_testing.py
+++ b/autograder/template_library/api_testing.py
@@ -2,11 +2,11 @@
 import requests
 import json
 import logging
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
-from autograder.builder.execution_helpers.sandbox_executor import SandboxExecutor
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
+from autograder.models.dataclass.test_result import TestResult
+from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
 # Configure basic logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -231,110 +231,3 @@ def get_test(self, name: str) -> TestFunction:
         return test_function
 
 
-if __name__ == "__main__":
-    import sys
-    import os
-
-    # This allows the script to find the other autograder modules
-    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../..'))
-    if project_root not in sys.path:
-        sys.path.insert(0, project_root)
-
-    from connectors.models.autograder_request import AutograderRequest
-    from connectors.models.assignment_config import AssignmentConfig
-    from autograder.context import request_context
-
-
-    def create_mock_submission():
-        """Creates the in-memory files for a simple student Express.js API."""
-        package_json = {
-            "name": "student-api", "version": "1.0.0", "main": "server.js",
-            "scripts": {"start": "node server.js"},
-            "dependencies": {"express": "^4.17.1"}
-        }
-        server_js = """
-           const express = require('express');
-           const app = express();
-           const port = 8000;
-
-           app.get('/health', (req, res) => res.status(200).send({ status: 'ok' }));
-           app.get('/api/user', (req, res) => res.json({ userId: 1, name: 'John Doe' }));
-
-           // The second argument '0.0.0.0' is the key.
-           app.listen(port, '0.0.0.0', () => {
-              console.log(`Server listening on port ${port}`);
-            });
-           """
-        return {
-            "package.json": json.dumps(package_json, indent=2),
-            "server.js": server_js
-        }
-
-
-    def create_mock_configs():
-        """Creates the mock setup and criteria configurations."""
-        setup_config = {
-            "runtime_image": "node:18-alpine",
-            "container_port": 8000,
-            "start_command": "node server.js",
-            "commands": {"install_dependencies": "npm install"}
-        }
-        criteria_config = {
-            "base": {
-                "subjects": {
-                    "api_functionality": {
-                        "weight": 100,
-                        "tests": [
-                            {"name": "health_check", "calls": [["/health"]]},
-                            {"name": "check_response_json", "calls": [["/api/user", "userId", 1]]}
-                        ]
-                    }
-                }
-            }
-        }
-        return setup_config, criteria_config
-
-
-    # --- Main Simulation Logic ---
-    logging.info("--- 1. Setting up mock environment ---")
-    submission_files = create_mock_submission()
-    setup_config, criteria_config = create_mock_configs()
-
-    assignment_config = AssignmentConfig(criteria=criteria_config, feedback=None, setup=setup_config)
-    autograder_request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="MockStudent"
-    )
-    request_context.set_request(autograder_request)
-
-    template = None
-    try:
-        logging.info("\n--- 2. Initializing API Testing Template (this will start the sandbox) ---")
-        template = ApiTestingTemplate()
-
-        logging.info("\n--- 3. Running Tests ---")
-
-        health_check_test = template.get_test("health_check")
-        health_result = health_check_test.execute("/health")
-
-        logging.info("\n[Health Check Result]")
-        logging.info(f"  Score: {health_result.score}")
-        logging.info(f"  Report: {health_result.report}")
-
-        json_check_test = template.get_test("check_response_json")
-        json_result = json_check_test.execute("/api/user", "userId", 1)
-
-        logging.info("\n[JSON Check Result]")
-        logging.info(f"  Score: {json_result.score}")
-        logging.info(f"  Report: {json_result.report}")
-
-    except Exception as e:
-        logging.error(f"\nAN ERROR OCCURRED: {e}")
-        import traceback
-        traceback.print_exc()
-
-    finally:
-        if template:
-            logging.info("\n--- 4. Cleaning up sandbox environment ---")
-            template.stop()
diff --git a/autograder/builder/template_library/templates/essay_grader.py b/autograder/template_library/essay_grader.py
similarity index 97%
rename from autograder/builder/template_library/templates/essay_grader.py
rename to autograder/template_library/essay_grader.py
index 0b6328d..5d0324b 100644
--- a/autograder/builder/template_library/templates/essay_grader.py
+++ b/autograder/template_library/essay_grader.py
@@ -1,8 +1,8 @@
-from autograder.builder.execution_helpers.AI_Executor import AiExecutor, ai_executor
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
+from autograder.utils.executors.ai_executor import ai_executor
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
+from autograder.models.dataclass.test_result import TestResult
 
 # ===============================================================
 # region: TestFunction Implementations
diff --git a/autograder/builder/template_library/templates/input_output.py b/autograder/template_library/input_output.py
similarity index 57%
rename from autograder/builder/template_library/templates/input_output.py
rename to autograder/template_library/input_output.py
index bc7cac4..1373fda 100644
--- a/autograder/builder/template_library/templates/input_output.py
+++ b/autograder/template_library/input_output.py
@@ -1,11 +1,8 @@
-import time
-import json
-
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
-from autograder.builder.execution_helpers.sandbox_executor import SandboxExecutor
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
+from autograder.models.dataclass.test_result import TestResult
+from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
 
 # ===============================================================
@@ -159,115 +156,3 @@ def get_test(self, name: str) -> TestFunction:
         return test_function
 
 
-if __name__ == "__main__":
-    import sys
-    import os
-
-    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../..'))
-    if project_root not in sys.path:
-        sys.path.insert(0, project_root)
-
-    from connectors.models.autograder_request import AutograderRequest
-    from connectors.models.assignment_config import AssignmentConfig
-    from autograder.context import request_context
-
-
-    def create_mock_submission():
-        """Creates an in-memory file for a simple Python calculator."""
-        calculator_py = """
-import sys
-
-def main():
-    try:
-        # Using sys.stdin.readline() is more robust for non-interactive scripts
-        operation = sys.stdin.readline().strip()
-        num1 = float(sys.stdin.readline().strip())
-        num2 = float(sys.stdin.readline().strip())
-
-        if operation == "sum":
-            print(num1 + num2)
-        elif operation == "subtract":
-            print(num1 - num2)
-        else:
-            print("Unknown operation")
-    except (ValueError, IndexError):
-        print("Invalid input")
-
-if __name__ == "__main__":
-    main()
-"""
-        return {"calculator.py": calculator_py}
-
-
-    def create_mock_configs():
-        """Creates the mock setup and criteria configurations."""
-        setup_config = {
-            "runtime_image": "python:3.11-slim",
-            "start_command": "python calculator.py"
-        }
-        criteria_config = {
-            "base": {
-                "subjects": {
-                    "calculation_tests": {
-                        "weight": 100,
-                        "tests": [
-                            {"name": "expect_output", "calls": [[["sum", 2, 2], "4.0"]]},
-                            {"name": "expect_output", "calls": [[["subtract", 10, 5], "5.0"]]}
-                        ]
-                    }
-                }
-            }
-        }
-        return setup_config, criteria_config
-
-
-    # --- Main Simulation Logic ---
-    print("--- 1. Setting up mock environment ---")
-    submission_files = create_mock_submission()
-    setup_config, criteria_config = create_mock_configs()
-
-    assignment_config = AssignmentConfig(criteria=criteria_config, feedback=None, setup=setup_config)
-    autograder_request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="MockStudent"
-    )
-    request_context.set_request(autograder_request)
-
-    template = None
-    try:
-        print("\n--- 2. Initializing Input/Output Template ---")
-        template = InputOutputTemplate()
-
-        print("\n--- 3. Running Tests ---")
-
-        # Test 1: Sum (Will pass)
-        test_func = template.get_test("expect_output")
-        sum_result = test_func.execute(["sum", 2, 2], "4.0")
-        print("\n[Sum Test Result]")
-        print(f"  Score: {sum_result.score}")
-        print(f"  Report: {sum_result.report}")
-
-        # Test 2: Sum (Will fail)
-        test_func = template.get_test("expect_output")
-        sum_result = test_func.execute(["sum", 2, 2], "3.0")
-        print("\n[Sum Test Result]")
-        print(f"  Score: {sum_result.score}")
-        print(f"  Report: {sum_result.report}")
-
-        # Test 2: Subtract
-        subtract_result = test_func.execute(["subtract", 10, 5], "5.0")
-        print("\n[Subtract Test Result]")
-        print(f"  Score: {subtract_result.score}")
-        print(f"  Report: {subtract_result.report}")
-
-    except Exception as e:
-        print(f"\nAN ERROR OCCURRED: {e}")
-        import traceback
-
-        traceback.print_exc()
-
-    finally:
-        if template:
-            print("\n--- 4. Cleaning up sandbox environment ---")
-            template.stop()
diff --git a/autograder/builder/template_library/templates/web_dev.py b/autograder/template_library/web_dev.py
similarity index 99%
rename from autograder/builder/template_library/templates/web_dev.py
rename to autograder/template_library/web_dev.py
index 89ae144..8d88e06 100644
--- a/autograder/builder/template_library/templates/web_dev.py
+++ b/autograder/template_library/web_dev.py
@@ -3,11 +3,11 @@
 
 from bs4 import BeautifulSoup
 
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
 
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 
 
 # ===============================================================
diff --git a/connectors/__init__.py b/autograder/utils/__init__.py
similarity index 100%
rename from connectors/__init__.py
rename to autograder/utils/__init__.py
diff --git a/connectors/adapters/__init__.py b/autograder/utils/executors/__init__.py
similarity index 100%
rename from connectors/adapters/__init__.py
rename to autograder/utils/executors/__init__.py
diff --git a/autograder/builder/execution_helpers/AI_Executor.py b/autograder/utils/executors/ai_executor.py
similarity index 64%
rename from autograder/builder/execution_helpers/AI_Executor.py
rename to autograder/utils/executors/ai_executor.py
index 0fa6178..fe6627a 100644
--- a/autograder/builder/execution_helpers/AI_Executor.py
+++ b/autograder/utils/executors/ai_executor.py
@@ -1,11 +1,11 @@
 import json
 from typing import List
 from openai import OpenAI
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 from pydantic import BaseModel, Field
 from autograder.context import request_context
 import dotenv
-from autograder.core.utils.secrets_fetcher import get_secret
+from autograder.utils.secrets_fetcher import get_secret
 
 dotenv.load_dotenv()  # Load environment variables from .env file
 
@@ -182,15 +182,3 @@ def stop(self):
 
 
 
-if __name__ == "__main__":
-    text = {"text.txt":"""Artificial intelligence (AI) is no longer a concept confined to science fiction; it is a transformative force actively reshaping industries and redefining the nature of work. Its integration into the modern workforce presents a profound duality: on one hand, it offers unprecedented opportunities for productivity and innovation, while on the other, it poses significant challenges related to job displacement and economic inequality. Navigating this transition successfully requires a proactive and nuanced approach from policymakers, businesses, and individuals alike.
-The primary benefit of AI in the workplace is its capacity to augment human potential and drive efficiency. AI-powered systems can analyze vast datasets in seconds, automating routine cognitive and manual tasks, which frees human workers to focus on more complex, creative, and strategic endeavors. For instance, in medicine, AI algorithms assist radiologists in detecting tumors with greater accuracy, while in finance, they identify fraudulent transactions far more effectively than any human team. This collaboration between human and machine not only boosts output but also creates new roles centered around AI development, ethics, and system maintenance—jobs that did not exist a decade ago.
-However, this technological advancement casts a significant shadow of disruption. The same automation that drives efficiency also leads to job displacement, particularly for roles characterized by repetitive tasks. Assembly line workers, data entry clerks, and even some paralegal roles face a high risk of obsolescence. This creates a widening skills gap, where demand for high-level technical skills soars while demand for traditional skills plummets. Without robust mechanisms for reskilling and upskilling the existing workforce, this gap threatens to exacerbate socio-economic inequality, creating a divide between those who can command AI and those who are displaced by it. There are many gramatical errors in this sentence, for testing purposes.
-The most critical challenge, therefore, is not to halt technological progress but to manage its societal impact. A multi-pronged strategy is essential. Governments and educational institutions must collaborate to reform curricula, emphasizing critical thinking, digital literacy, and lifelong learning. Furthermore, corporations have a responsibility to invest in their employees through continuous training programs. Finally, strengthening social safety nets, perhaps through concepts like Universal Basic Income (UBI) or enhanced unemployment benefits, may be necessary to support individuals as they navigate this volatile transition period.
-In conclusion, AI is a double-edged sword. Its potential to enhance productivity and create new avenues for growth is undeniable, but so are the risks of displacement and inequality. The future of work will not be a battle of humans versus machines, but rather a story of adaptation. By investing in education, promoting equitable policies, and fostering a culture of continuous learning, we can harness the power of AI to build a more prosperous and inclusive workforce for all."""}
-
-    ai_executor.add_test("Content: Identify Specific Examples","In a scale of 0 to 100, how well does the text provide specific examples to support its main points? Consider the relevance and clarity of the examples given.")
-    ai_executor.add_test("Clarity: Evaluate Overall Clarity","On a scale from 0 to 100, how clear and understandable is the text? Consider the organization of ideas, sentence structure, and use of language.")
-    ai_executor.add_test("Grammar: Check for Grammatical Errors","On a scale from 0 to 100, how free is the text from grammatical errors? Consider issues such as subject-verb agreement, punctuation, and sentence fragments.")
-    ai_executor.add_test("Engagement: Assess Reader Engagement","On a scale from 0 to 100, how engaging is the text? Consider the use of anecdotes, rhetorical questions, and vivid language that captures the reader's interest.")
-    results = ai_executor.stop()
diff --git a/autograder/builder/execution_helpers/sandbox_executor.py b/autograder/utils/executors/sandbox_executor.py
similarity index 100%
rename from autograder/builder/execution_helpers/sandbox_executor.py
rename to autograder/utils/executors/sandbox_executor.py
diff --git a/connectors/adapters/api/__init__.py b/autograder/utils/formatters/__init__.py
similarity index 100%
rename from connectors/adapters/api/__init__.py
rename to autograder/utils/formatters/__init__.py
diff --git a/autograder/utils/formatters/criteria_tree.py b/autograder/utils/formatters/criteria_tree.py
new file mode 100644
index 0000000..7d9d67c
--- /dev/null
+++ b/autograder/utils/formatters/criteria_tree.py
@@ -0,0 +1,26 @@
+from typing import List, override
+from autograder.utils.processers.criteria_tree import CriteriaTreeProcesser
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from autograder.models.criteria_tree import CategoryNode, SubjectNode, TestNode
+
+
+class CriteriaTreeFormatter(CriteriaTreeProcesser):
+    def header(self) -> str:
+        return "🌲 Criteria Tree"
+
+    @override
+    def process_test(self, test: "TestNode") -> List[str]:
+        result: List[str] = list()
+        result.append(f"  🧪 {test.name} (file: {test.file_target})")
+        result.append(f"    - Parameters: {test.parameters}")
+        return result
+
+    @override
+    def process_subject(self, subject: "SubjectNode") -> str:
+        return f"📘 {subject.name} (weight: {subject.weight})"
+
+    @override
+    def process_category(self, category: "CategoryNode") -> str:
+        return f"  📁 {category.name.upper()} (max_score: {category.weight})"
diff --git a/connectors/adapters/github_action_adapter/__init__.py b/autograder/utils/printers/__init__.py
similarity index 100%
rename from connectors/adapters/github_action_adapter/__init__.py
rename to autograder/utils/printers/__init__.py
diff --git a/autograder/utils/printers/criteria_tree.py b/autograder/utils/printers/criteria_tree.py
new file mode 100644
index 0000000..686dc3d
--- /dev/null
+++ b/autograder/utils/printers/criteria_tree.py
@@ -0,0 +1,47 @@
+from typing import TYPE_CHECKING
+from autograder.utils.formatters.criteria_tree import CriteriaTreeFormatter
+
+if TYPE_CHECKING:
+    from autograder.models.criteria_tree import CriteriaTree, CategoryNode, SubjectNode
+
+
+class CriteriaTreePrinter:
+    def __init__(self, formatter: CriteriaTreeFormatter | None = None) -> None:
+        self.__depth = 0
+        self.__formatter = CriteriaTreeFormatter() if formatter is None else formatter
+
+    def __increase_depth(self) -> None:
+        self.__depth += 1
+
+    def __decrease_depth(self) -> None:
+        self.__depth -= 1
+
+    def __print_with_depth(self, formatted: str) -> None:
+        print(f"{'    ' * self.__depth}{formatted}")
+
+    def __print_children(self, parent: "CategoryNode | SubjectNode") -> None:
+        for subject in parent.subjects:
+            self.print_subject(subject)
+
+        for test in parent.tests:
+            lines = self.__formatter.process_test(test)
+            for line in lines:
+                self.__print_with_depth(line)
+
+    def print_subject(self, subject: "SubjectNode") -> None:
+        self.__increase_depth()
+        self.__print_with_depth(self.__formatter.process_subject(subject))
+        self.__print_children(subject)
+        self.__decrease_depth()
+
+    def print_category(self, category: "CategoryNode") -> None:
+        self.__print_with_depth(self.__formatter.process_category(category))
+        self.__print_children(category)
+
+    def print_tree(self, tree: "CriteriaTree") -> None:
+        self.__print_with_depth(self.__formatter.header())
+        self.print_category(tree.base)
+        if tree.bonus:
+            self.print_category(tree.bonus)
+        if tree.penalty:
+            self.print_category(tree.penalty)
diff --git a/connectors/models/__init__.py b/autograder/utils/processers/__init__.py
similarity index 100%
rename from connectors/models/__init__.py
rename to autograder/utils/processers/__init__.py
diff --git a/autograder/utils/processers/criteria_tree.py b/autograder/utils/processers/criteria_tree.py
new file mode 100644
index 0000000..8ee966f
--- /dev/null
+++ b/autograder/utils/processers/criteria_tree.py
@@ -0,0 +1,19 @@
+from abc import ABC, abstractmethod
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from autograder.models.criteria_tree import CategoryNode, SubjectNode, TestNode
+
+
+class CriteriaTreeProcesser(ABC):
+    @abstractmethod
+    def process_subject(self, subject: "SubjectNode") -> Any:
+        pass
+
+    @abstractmethod
+    def process_test(self, test: "TestNode") -> Any:
+        pass
+
+    @abstractmethod
+    def process_category(self, category: "CategoryNode") -> Any:
+        pass
diff --git a/autograder/core/utils/secrets_fetcher.py b/autograder/utils/secrets_fetcher.py
similarity index 100%
rename from autograder/core/utils/secrets_fetcher.py
rename to autograder/utils/secrets_fetcher.py
diff --git a/connectors/adapters/api/api_adapter.py b/connectors/adapters/api/api_adapter.py
deleted file mode 100644
index 70d4934..0000000
--- a/connectors/adapters/api/api_adapter.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import inspect
-import textwrap
-from typing import List, Optional, Dict, Any
-from fastapi import UploadFile
-
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-import json
-from connectors.port import Port
-import logging
-from autograder.builder.template_library.library import TemplateLibrary
-from autograder.context import request_context
-
-class ApiAdapter(Port):
-
-    def export_results(self):
-        """
-        Prepares the results of the autograding workfow as an API response.
-        Also retrieves important data from the request (student_credentiaals)
-        """
-        if not self.autograder_response:
-            raise Exception("No autograder response available. Please run the autograder first.")
-
-        # Prepare the API response
-        test_report = self.autograder_response.test_report
-        response = {
-            "server_status": "Server connection happened successfully",
-            "autograding_status": self.autograder_response.status,
-            "final_score": self.autograder_response.final_score,
-            "feedback": self.autograder_response.feedback,
-            "test_report": [test_result.to_dict() for test_result in test_report] if test_report else [],
-        }
-
-        return response
-
-    async def create_request(self,
-                       submission_files: List[UploadFile],
-                       assignment_config: AssignmentConfig,
-                       student_name,
-                       student_credentials,
-                       include_feedback=False,
-                       feedback_mode="default",
-                       openai_key=None,
-                       redis_url=None,
-                       redis_token=None):
-        submission_files_dict = {}
-        for submission_file in submission_files:
-            if ".git" in submission_file.filename:
-                continue
-            submission_content = await submission_file.read()
-            submission_files_dict[submission_file.filename] =  submission_content.decode("utf-8")
-        self.autograder_request = AutograderRequest(
-            submission_files=submission_files_dict,
-            assignment_config=assignment_config,
-            student_name=student_name,
-            student_credentials=student_credentials,
-            include_feedback=include_feedback,
-            feedback_mode=feedback_mode,
-            openai_key=openai_key,
-            redis_url=redis_url,
-            redis_token=redis_token,
-        )
-
-
-    async def load_assignment_config(self, template: str, criteria: UploadFile, feedback: UploadFile,
-                               setup: Optional[UploadFile] = None, custom_template: Optional[UploadFile] = None) -> AssignmentConfig:
-        """
-        Loads the assignment configuration based on the provided template preset.
-        """
-        logger = logging.getLogger(__name__)
-        try:
-            # Read and parse template name
-            template_name = template
-            logger.info(f"Template name: {template_name}")
-
-            # Loads the raw json strings (template,criteria,feedback and setup) into dictionaries
-            criteria_content = await criteria.read()
-            criteria_dict = json.loads(criteria_content.decode("utf-8")) if criteria else None
-            logger.info(f"Criteria loaded: {criteria_dict is not None}")
-
-            feedback_content = await feedback.read()
-            feedback_dict = json.loads(feedback_content.decode("utf-8")) if feedback else None
-            logger.info(f"Feedback config loaded: {feedback_dict is not None}")
-
-            setup_dict = None
-            if setup:
-                setup_content = await setup.read()
-                setup_dict = json.loads(setup_content.decode("utf-8")) if setup else None
-                logger.info(f"Setup config loaded: {setup_dict is not None}")
-            custom_template_str = None
-            if custom_template:
-                custom_template_content = await custom_template.read()
-                custom_template_str = custom_template_content.decode("utf-8")
-
-            return AssignmentConfig(criteria=criteria_dict, feedback=feedback_dict, setup=setup_dict,
-                                    template=template_name, custom_template_str = custom_template_str)
-
-        except json.JSONDecodeError as e:
-            logger.error(f"Invalid JSON in configuration files: {e}")
-            raise ValueError(f"Invalid JSON format in configuration files: {e}")
-        except UnicodeDecodeError as e:
-            logger.error(f"Encoding error reading configuration files: {e}")
-            raise ValueError(f"Unable to decode configuration files: {e}")
-
-    def get_template_info(self,template_name: str) -> Dict[str, Any]:
-        """
-        Retrieves a dictionary containing all the information of a Template,
-        including its name, description, and full details for each test function
-        (name, description, parameters, and source code).
-        """
-
-        request_context.set_request(AutograderRequest.build_empty_request())
-        print("REQUEST_CONTEXT:", request_context.get_request())
-        # 1. Retrieve an instance of the template from the library
-        return TemplateLibrary.get_template_info(template_name)
-        
-
-    
-
-if __name__ == "__main__":
-    adapter = ApiAdapter()
-    template_info = adapter.get_template_info("web dev")
-    print(template_info)
-
diff --git a/connectors/adapters/github_action_adapter/github_adapter.py b/connectors/adapters/github_action_adapter/github_adapter.py
deleted file mode 100644
index 2684544..0000000
--- a/connectors/adapters/github_action_adapter/github_adapter.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import json
-import os
-import shutil
-
-from connectors.models.assignment_config import AssignmentConfig
-from connectors.models.autograder_request import AutograderRequest
-from connectors.port import Port
-from github import Github
-from github.GithubException import UnknownObjectException
-
-class GithubAdapter(Port):
-    def __init__(self,github_token,app_token):
-        super().__init__()
-        self.github_token = github_token
-        self.app_token = app_token
-        self.repo = self.get_repository(app_token)
-
-    def get_repository(self,app_token):
-        try:
-            repos = os.getenv("GITHUB_REPOSITORY")
-            g = Github(app_token)
-            repo = g.get_repo(repos)
-            print("This repo is: ", repo)
-            return repo
-        except:
-            raise Exception("Failed to get repository. Please check your GitHub token and repository settings.")
-
-    def notify_classroom(self):
-        final_score = self.autograder_response.final_score
-        if final_score < 0 or final_score > 100:
-            print("Invalid final score. It should be between 0 and 100.")
-            return
-
-            # Retrieve the GitHub token and repository information from environment variables
-
-        repo_name = os.getenv("GITHUB_REPOSITORY")
-        if not repo_name:
-            print("Repository information is missing.")
-            return
-
-        token = os.getenv("GITHUB_TOKEN")
-        # Create the GitHub client using the token
-        g = Github(token)
-        repo = g.get_repo(repo_name)
-
-        # Get the workflow run ID
-        run_id = os.getenv("GITHUB_RUN_ID")
-        if not run_id:
-            print("Run ID is missing.")
-            return
-
-        # Fetch the workflow run
-        workflow_run = repo.get_workflow_run(int(run_id))
-
-        # Find the check suite run ID
-        check_suite_url = workflow_run.check_suite_url
-        check_suite_id = int(check_suite_url.split('/')[-1])
-
-        # Get the check runs for this suite
-        check_runs = repo.get_check_suite(check_suite_id)
-        check_run = next((run for run in check_runs.get_check_runs() if run.name == "grading"), None)
-        if not check_run:
-            print("Check run not found.")
-            return
-        # Create a summary for the final grade
-        text = f"Final Score: {format(final_score, '.2f')}/100"
-
-        # Update the check run with the final score
-        check_run.edit(
-            name="Autograding",
-            output={
-                "title": "Autograding Result",
-                "summary": text,
-                "text": json.dumps({"totalPoints": format(final_score, '.2f'), "maxPoints": 100}),
-                "annotations": [{
-                    "path": ".github",
-                    "start_line": 1,
-                    "end_line": 1,
-                    "annotation_level": "notice",
-                    "message": text,
-                    "title": "Autograding complete"
-                }]
-            }
-        )
-
-        print(f"Final grade updated: {format(final_score, '.2f')}/100")
-
-    def commit_feedback(self):
-        file_path = "relatorio.md"
-        file_sha = None
-        commit_message = ""
-        # If the autograder_request exists and include_feedback is explicitly False,
-        # skip committing the relatorio.md file.
-        req = getattr(self, 'autograder_request', None)
-        if req is not None and not getattr(req, 'include_feedback', False):
-            print("Feedback generation disabled (include_feedback=False), skipping commit of relatorio.md.")
-            return
-
-        # Safely get feedback content (may be None or autograder_response may not exist)
-        new_content = None
-        resp = getattr(self, 'autograder_response', None)
-        if resp is not None:
-            new_content = getattr(resp, 'feedback', None)
-        # 1. Tente obter o arquivo para ver se ele já existe
-        try:
-            file = self.repo.get_contents(file_path)
-            file_sha = file.sha
-            print(f"Arquivo '{file_path}' encontrado. Preparando para atualizar...")
-        except UnknownObjectException:
-            print(f"Arquivo '{file_path}' não encontrado. Preparando para criar...")
-            pass
-
-        # 2. Fora do try/except, decida se cria ou atualiza
-        if file_sha:
-            commit_message = f"Atualizando relatório: {file_path}"
-            self.repo.update_file(path=file_path, message=commit_message, content=new_content, sha=file_sha)
-            print("Relatório atualizado com sucesso.")
-        else:
-            commit_message = f"Criando relatório: {file_path}"
-            self.repo.create_file(path=file_path, message=commit_message, content=new_content)
-            print("Relatório criado com sucesso.")
-    def export_results(self):
-        self.commit_feedback()
-        self.notify_classroom()
-
-
-
-    def get_submission_files(self):
-
-        base_path = os.getenv("GITHUB_WORKSPACE", ".")
-        submission_path = os.path.join(base_path, 'submission')
-        submission_files_dict = {}
-
-        # take all files in the submission directory and add them to the submission_files_dict
-        for root, dirs, files in os.walk(submission_path):
-         # Skip .git directory
-         if '.git' in dirs:
-             dirs.remove('.git')
-         if '.github' in dirs:
-             dirs.remove('.github')
-         for file in files:
-             # Full path to the file
-             file_path = os.path.join(root, file)
-
-             # Key: Path relative to the starting directory to ensure uniqueness
-             relative_path = os.path.relpath(file_path, submission_path)
-
-             try:
-                 with open(file_path, "r", encoding='utf-8', errors='ignore') as f:
-                     print("Adding file to submission_files_dict: ", relative_path)
-                     # Use the unique relative_path as the key
-                     submission_files_dict[relative_path] = f.read()
-             except Exception as e:
-                 print(f"Could not read file {file_path}: {e}")
-
-        return submission_files_dict
-
-    def create_request(self, submission_files, assignment_config, student_name, student_credentials, feedback_mode="default", openai_key=None, redis_url=None, redis_token=None, include_feedback = False):
-        """
-        Creates an AutograderRequest object with the provided parameters.
-        """
-        print("Getting submission files from the repository...")
-        submission_files_dict = self.get_submission_files()
-        print(submission_files_dict)
-        print(f"Creating AutograderRequest with {feedback_mode} feedback mode")
-        self.autograder_request = AutograderRequest(
-            submission_files=submission_files_dict,
-            assignment_config=assignment_config,
-            student_name=student_name,
-            student_credentials=student_credentials,
-            include_feedback=include_feedback,
-            feedback_mode=feedback_mode,
-            openai_key=openai_key,
-            redis_url=redis_url,
-            redis_token=redis_token,
-        )
-        print(f"AutograderRequest created with {self.autograder_request.feedback_mode} feedback mode")
-
-    def create_assigment_config(self,template_preset):
-        """
-        Looks inside $GITHUB_WORKSPACE/submission/.github/autograder for the criteria.json, feedback.json and setup.json files.
-        """
-        base_path = os.getenv("GITHUB_WORKSPACE", ".")
-        submission_path = os.path.join(base_path, 'submission')
-        configuration_path = os.path.join(submission_path, '.github','autograder')
-
-        criteria_path = os.path.join(configuration_path, 'criteria.json')
-        if not os.path.exists(criteria_path):
-            raise FileNotFoundError("criteria.json file not found in the autograder configuration directory.")
-        feedback_path = os.path.join(configuration_path, 'feedback.json')
-        if not os.path.exists(feedback_path):
-            raise FileNotFoundError("feedback.json file not found in the autograder configuration directory.")
-        setup_path = os.path.join(configuration_path, 'setup.json')
-
-
-        criteria_dict = None
-        feedback_dict = None
-        setup_dict = None
-
-        with open(criteria_path, 'r', encoding='utf-8') as f:
-            criteria_dict = json.load(f)
-        print("Criteria loaded successfully.")
-
-
-
-        with open(feedback_path, 'r', encoding='utf-8') as f:
-            feedback_dict = json.load(f)
-        print("Feedback config loaded successfully.")
-
-
-
-        with open(setup_path, 'r', encoding='utf-8') as f:
-            setup_dict = json.load(f)
-        print("Setup config loaded successfully.")
-
-        custom_template_str = None
-        if template_preset == "custom":
-            custom_template_path = os.path.join(configuration_path, 'template.py')
-            if not os.path.exists(custom_template_path):
-                raise FileNotFoundError("Custom template file 'template.py' not found in the autograder configuration directory.")
-            with open(custom_template_path, 'r', encoding='utf-8') as f:
-                custom_template_str = f.read()
-            print("Custom template loaded successfully.")
-
-        assignment_config = AssignmentConfig(
-            template=template_preset,
-            criteria=criteria_dict,
-            feedback=feedback_dict,
-            setup=setup_dict,
-            custom_template_str=custom_template_str,
-        )
-        return assignment_config
-
-
-    @classmethod
-    def create(cls,test_framework,github_author,feedback_type,github_token,app_token,openai_key=None,redis_url=None,redis_token=None):
-        response = cls(test_framework,github_author)
-        response.get_repository(app_token)
-        return response
\ No newline at end of file
diff --git a/connectors/adapters/api/api_entrypoint.py b/connectors/api_connector.py
similarity index 99%
rename from connectors/adapters/api/api_entrypoint.py
rename to connectors/api_connector.py
index 778db74..8af0bcc 100644
--- a/connectors/adapters/api/api_entrypoint.py
+++ b/connectors/api_connector.py
@@ -7,7 +7,7 @@
 from connectors.models.assignment_config import AssignmentConfig
 import uvicorn
 
-from connectors.adapters.api.api_adapter import ApiAdapter
+from connectors import ApiAdapter
 # Initialize the FastAPI app
 app = FastAPI(
     title="WebTech Autograder API Service",
diff --git a/connectors/adapters/github_action_adapter/github_entrypoint.py b/connectors/github_connector.py
similarity index 97%
rename from connectors/adapters/github_action_adapter/github_entrypoint.py
rename to connectors/github_connector.py
index 77505d4..de22ca5 100644
--- a/connectors/adapters/github_action_adapter/github_entrypoint.py
+++ b/connectors/github_connector.py
@@ -1,5 +1,5 @@
 from argparse import ArgumentParser
-from connectors.adapters.github_action_adapter.github_adapter import GithubAdapter
+from connectors import GithubAdapter
 from connectors.models.assignment_config import AssignmentConfig
 parser = ArgumentParser(description="GitHub Action Adapter for Autograder")
 parser.add_argument("--github-token", type=str, required=True, help="GitHub Token")
diff --git a/connectors/models/assignment_config.py b/connectors/models/assignment_config.py
deleted file mode 100644
index c106745..0000000
--- a/connectors/models/assignment_config.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import os
-from typing import Dict, Any, Optional
-from pydantic import BaseModel, Field
-
-
-class AssignmentConfig(BaseModel):
-    template: str = "custom"
-    criteria: Dict[str, Any]
-    feedback: Dict[str, Any]
-    setup: Optional[Dict[str, Any]] = None
-    custom_template: Optional[str] = None
-    
-    def __str__(self) -> str:
-        """
-        Returns a string representation of the AssignmentConfig object.
-        """
-        criteria = "[Loaded]" if self.criteria else "[Not Loaded]"
-        feedback = "[Loaded]" if self.feedback else "[Not Loaded]"
-        setup = "[Loaded]" if self.setup else "[Not Loaded]"
-        template_str = "[Loaded]" if self.custom_template else "[Not Loaded]"
-        
-        return (
-            f"AssignmentConfig(template={self.template}, criteria={criteria}, "
-            f"feedback={feedback}, setup={setup}, custom_template_str={template_str})"
-        )
-
-
-if __name__ == "__main__":
-    # Example usage
-    config = AssignmentConfig(
-        template="custom",
-        criteria={"test_case_1": "description"},
-        feedback={"style": "detailed"},
-        setup={"environment": "python3.8"},
-        custom_template="def custom_function(): pass"
-    )
-    print(config)
\ No newline at end of file
diff --git a/connectors/models/autograder_request.py b/connectors/models/autograder_request.py
deleted file mode 100644
index fce0cc6..0000000
--- a/connectors/models/autograder_request.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from typing import Dict, Any, Optional
-from pydantic import BaseModel, Field
-from connectors.models.assignment_config import AssignmentConfig
-
-
-class AutograderRequest(BaseModel):
-    submission_files: Dict[str, Any]
-    assignment_config: AssignmentConfig
-    student_name: str
-    student_credentials: Optional[str] = None
-    include_feedback: bool = False
-    feedback_mode: str = "default"
-    openai_key: Optional[str] = None
-    redis_url: Optional[str] = None
-    redis_token: Optional[str] = None
-    criteria_tree: Optional[Any] = None
-    reporter: Optional[Any] = None
-    feedback_report: Optional[Any] = None
-    
-    def __str__(self) -> str:
-        stri = f"{len(self.submission_files)} submission files.\n"
-        stri += f"Assignment config: {self.assignment_config}\n"
-        stri += f"Student name: {self.student_name}\n"
-        stri += f"Feedback mode: {self.feedback_mode}\n"
-        return stri
-    
-    @classmethod
-    def build_empty_request(cls) -> "AutograderRequest":
-        return cls(
-            submission_files={},
-            assignment_config=AssignmentConfig(
-                criteria={}, 
-                feedback={}, 
-                setup={}, 
-                template=""
-            ),
-            student_name="",
-            include_feedback=False
-        )
\ No newline at end of file
diff --git a/connectors/port.py b/connectors/port.py
deleted file mode 100644
index 7198bef..0000000
--- a/connectors/port.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from abc import ABC, abstractmethod
-
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.autograder_facade import Autograder
-
-
-class Port(ABC):
-    """
-    Abstract Port class that defines the accepted interface for the core system communication.
-    """
-    def __init__(self):
-        self.autograder_request = None
-        self.autograder_response = None
-
-    def run_autograder(self):
-        try:
-            response = Autograder.grade(self.autograder_request)
-            self.autograder_response = response
-            return self
-        except Exception as e:
-            raise Exception(f"Error running autograder: {e}") from e
-
-
-    @abstractmethod
-    def export_results(self):
-        """
-        Abstract method to export the results of the autograder.
-        This method should be implemented by the concrete Port classes.
-        """
-        pass
-
-    @abstractmethod
-    def create_request(self,
-                       submission_files,
-                       assignment_config: AssignmentConfig,
-                       student_name,
-                       student_credentials,
-                       feedback_mode="default",
-                       openai_key=None,
-                       redis_url=None,
-                       redis_token=None):
-        """
-        Abstract method to create an AutograderRequest object.
-        This method should be implemented by the concrete Port classes.
-        """
-        pass
diff --git a/critera_schema.json b/critera_schema.json
new file mode 100644
index 0000000..31753a3
--- /dev/null
+++ b/critera_schema.json
@@ -0,0 +1,570 @@
+{
+  "test_library": "web_dev",
+  "base": {
+    "weight": 100,
+    "subjects": [
+      {
+        "subject_name": "html",
+        "weight": 60,
+        "subjects": [
+          {
+            "subject_name": "structure",
+            "weight": 40,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "body"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "header"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "nav"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "main"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "article"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 4
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "img"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 5
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "footer"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "div"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "form"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "input"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "button"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_attribute",
+                "parameters": [
+                  {
+                    "name": "attribute",
+                    "value": "class"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 2
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "subject_name": "link",
+            "weight": 20,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "check_css_linked"
+              },
+              {
+                "file": "index.html",
+                "name": "check_internal_links_to_article",
+                "parameters": [
+                  {
+                    "name": "required_count",
+                    "value": 4
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "subject_name": "css",
+        "weight": 40,
+        "subjects": [
+          {
+            "subject_name": "responsivity",
+            "weight": 50,
+            "tests": [
+              {
+                "file": "css/styles.css",
+                "name": "uses_relative_units"
+              },
+              {
+                "file": "css/styles.css",
+                "name": "check_media_queries"
+              },
+              {
+                "file": "css/styles.css",
+                "name": "check_flexbox_usage"
+              }
+            ]
+          },
+          {
+            "subject_name": "style",
+            "weight": 50,
+            "tests": [
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "font-size"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "font-family"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "text-align"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "display"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "position"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "margin"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "padding"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "bonus": {
+    "weight": 40,
+    "subjects": [
+      {
+        "subject_name": "accessibility",
+        "weight": 20,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_all_images_have_alt"
+          }
+        ]
+      },
+      {
+        "subject_name": "head_detail",
+        "weight": 80,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_head_details",
+            "parameters": [
+              {
+                "name": "detail_tag",
+                "value": "title"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_head_details",
+            "parameters": [
+              {
+                "name": "detail_tag",
+                "value": "meta"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "charset"
+              },
+              {
+                "name": "value",
+                "value": "UTF-8"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "viewport"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "description"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "author"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "keywords"
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "penalty": {
+    "weight": 50,
+    "subjects": [
+      {
+        "subject_name": "html",
+        "weight": 50,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_bootstrap_usage"
+          },
+          {
+            "file": "css/styles.css",
+            "name": "check_id_selector_over_usage",
+            "parameters": [
+              {
+                "name": "max_allowed",
+                "value": 2
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "has_forbidden_tag",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "script"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_html_direct_children"
+          },
+          {
+            "file": "index.html",
+            "name": "check_tag_not_inside",
+            "parameters": [
+              {
+                "name": "child_tag",
+                "value": "header"
+              },
+              {
+                "name": "parent_tag",
+                "value": "main"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_tag_not_inside",
+            "parameters": [
+              {
+                "name": "child_tag",
+                "value": "footer"
+              },
+              {
+                "name": "parent_tag",
+                "value": "main"
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "subject_name": "project_structure",
+        "weight": 50,
+        "tests": [
+          {
+            "file": "all",
+            "name": "check_dir_exists",
+            "parameters": [
+              {
+                "name": "dir_path",
+                "value": "css"
+              }
+            ]
+          },
+          {
+            "file": "all",
+            "name": "check_dir_exists",
+            "parameters": [
+              {
+                "name": "dir_path",
+                "value": "imgs"
+              }
+            ]
+          },
+          {
+            "file": "all",
+            "name": "check_project_structure",
+            "parameters": [
+              {
+                "name": "expected_structure",
+                "value": "css/styles.css"
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/docs/system/configuration/criteria_schema_v2.md b/docs/system/configuration/criteria_schema_v2.md
new file mode 100644
index 0000000..0608e0a
--- /dev/null
+++ b/docs/system/configuration/criteria_schema_v2.md
@@ -0,0 +1,306 @@
+# Criteria Schema Documentation
+
+## Overview
+
+The criteria schema defines the grading rubric for assignments. It uses a hierarchical structure with categories, subjects, and tests.
+
+## Schema Version: 2.0 (Current)
+
+### Key Changes from Version 1.0
+
+1. **Subjects as Arrays**: Subjects are now arrays with explicit `subject_name` field (instead of dictionaries with implicit names as keys)
+2. **Named Parameters**: Test parameters are now named objects `[{"name": "param", "value": "val"}]` (instead of positional arrays)
+3. **No Calls Array**: Each test object represents one execution (no `calls` array)
+4. **Template Library Field**: Root config includes optional `test_library` field
+
+## Schema Structure
+
+### Root Configuration
+
+```json
+{
+  "test_library": "web_dev",  // Optional: name of test template to use
+  "base": { /* CategoryConfig */ },     // Required: base grading criteria
+  "bonus": { /* CategoryConfig */ },    // Optional: bonus points
+  "penalty": { /* CategoryConfig */ }   // Optional: penalty points
+}
+```
+
+### Category Configuration
+
+A category can contain either **subjects** OR **tests** (not both).
+
+```json
+{
+  "weight": 100,                        // Weight of this category (0-100)
+  "subjects": [ /* SubjectConfig[] */ ] // Array of subjects
+  // OR
+  "tests": [ /* TestConfig[] */ ]      // Array of tests
+}
+```
+
+### Subject Configuration
+
+A subject can contain either **nested subjects** OR **tests** (not both).
+
+```json
+{
+  "subject_name": "html_structure",    // Required: name of the subject
+  "weight": 40,                         // Weight of this subject (0-100)
+  "subjects": [ /* SubjectConfig[] */ ] // Array of nested subjects
+  // OR
+  "tests": [ /* TestConfig[] */ ]      // Array of tests
+}
+```
+
+### Test Configuration
+
+```json
+{
+  "name": "has_tag",                   // Required: test function name
+  "file": "index.html",                // Optional: target file
+  "parameters": [                      // Optional: named parameters
+    {
+      "name": "tag",
+      "value": "div"
+    },
+    {
+      "name": "required_count",
+      "value": 5
+    }
+  ]
+}
+```
+
+## Complete Example
+
+```json
+{
+  "test_library": "web_dev",
+  "base": {
+    "weight": 100,
+    "subjects": [
+      {
+        "subject_name": "html",
+        "weight": 60,
+        "subjects": [
+          {
+            "subject_name": "structure",
+            "weight": 40,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {"name": "tag", "value": "body"},
+                  {"name": "required_count", "value": 1}
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {"name": "tag", "value": "header"},
+                  {"name": "required_count", "value": 1}
+                ]
+              }
+            ]
+          },
+          {
+            "subject_name": "links",
+            "weight": 20,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "check_css_linked"
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "subject_name": "css",
+        "weight": 40,
+        "tests": [
+          {
+            "file": "style.css",
+            "name": "has_style",
+            "parameters": [
+              {"name": "property", "value": "margin"},
+              {"name": "count", "value": 1}
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "bonus": {
+    "weight": 20,
+    "subjects": [
+      {
+        "subject_name": "accessibility",
+        "weight": 100,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_all_images_have_alt"
+          }
+        ]
+      }
+    ]
+  },
+  "penalty": {
+    "weight": 30,
+    "subjects": [
+      {
+        "subject_name": "bad_practices",
+        "weight": 100,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "has_forbidden_tag",
+            "parameters": [
+              {"name": "tag", "value": "script"}
+            ]
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+## Validation Rules
+
+### Category Level
+- Must have either `subjects` OR `tests` (not both, not neither)
+- Weight must be between 0 and 100
+- If `subjects` is present, it must be a non-empty array
+
+### Subject Level
+- Must have `subject_name` field
+- Must have either `subjects` OR `tests` (not both, not neither)
+- Weight must be between 0 and 100
+- If `subjects` is present, it must be a non-empty array
+
+### Test Level
+- Must have `name` field (test function name)
+- `file` is optional (some tests don't target specific files)
+- `parameters` is optional (empty array or omitted means no parameters)
+- Each parameter must have `name` and `value` fields
+
+### Weight Balancing
+- Sibling subjects/tests have their weights automatically balanced to sum to 100
+- Example: If you have 3 subjects with weights [30, 40, 50], they'll be scaled to [25, 33.33, 41.67]
+
+## Parameter Handling
+
+Parameters are converted from named objects to positional arguments when calling test functions:
+
+```json
+"parameters": [
+  {"name": "tag", "value": "div"},
+  {"name": "required_count", "value": 5}
+]
+```
+
+Becomes: `test_function.execute("div", 5, files=submission_files)`
+
+The order of parameters in the array determines the order of positional arguments.
+
+## Special File Values
+
+- `"file": "index.html"` - Target specific file
+- `"file": "all"` - Pass all submission files to test
+- `"file": null` or omitted - No specific file target
+
+## Migration from Schema v1.0
+
+### Old Format (v1.0)
+```json
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "html_structure": {
+        "weight": 40,
+        "tests": [
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "calls": [
+              ["div", 5],
+              ["h1", 2]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+### New Format (v2.0)
+```json
+{
+  "test_library": "web_dev",
+  "base": {
+    "weight": 100,
+    "subjects": [
+      {
+        "subject_name": "html_structure",
+        "weight": 40,
+        "tests": [
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "parameters": [
+              {"name": "tag", "value": "div"},
+              {"name": "required_count", "value": 5}
+            ]
+          },
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "parameters": [
+              {"name": "tag", "value": "h1"},
+              {"name": "required_count", "value": 2}
+            ]
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+### Key Differences
+1. Each test execution is now a separate test object (no `calls` array)
+2. Subjects use array format with `subject_name` field
+3. Parameters are named objects instead of positional arrays
+4. Added optional `test_library` field at root
+
+## Best Practices
+
+1. **Clear Naming**: Use descriptive `subject_name` values
+2. **Logical Grouping**: Group related tests under subjects
+3. **Weight Distribution**: Assign weights based on importance
+4. **Parameter Names**: Use clear parameter names that match test function signatures
+5. **File Organization**: Specify file paths relative to submission root
+
+## Pydantic Models
+
+The schema is validated using Pydantic models:
+
+- `CriteriaConfig` - Root configuration
+- `CategoryConfig` - Category (base/bonus/penalty)
+- `SubjectConfig` - Subject node
+- `TestConfig` - Test configuration
+- `ParameterConfig` - Named parameter
+
+These models provide:
+- Automatic validation
+- Type checking
+- Helpful error messages
+- IDE autocomplete support
+
diff --git a/docs/system/creating_assignments.md b/docs/system/creating_assignments.md
index 95970b7..ebd7639 100644
--- a/docs/system/creating_assignments.md
+++ b/docs/system/creating_assignments.md
@@ -128,7 +128,8 @@ If you need complete control over grading logic, you can create a `template.py`
 ```python
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
+
 
 # ===============================================================
 # region: TestFunction Implementations
@@ -138,18 +139,18 @@ class HasRequiredFile(TestFunction):
     @property
     def name(self):
         return "has_required_file"
-    
+
     @property
     def description(self):
         return "Checks if a required file exists in the submission"
-    
+
     @property
     def parameter_description(self):
         return {
             "file_path": "Path to the required file",
             "file_name": "Name of the required file"
         }
-    
+
     def execute(self, file_path: str, file_name: str) -> TestResult:
         import os
         exists = os.path.exists(file_path)
@@ -162,18 +163,18 @@ class CheckMinimumLines(TestFunction):
     @property
     def name(self):
         return "check_minimum_lines"
-    
+
     @property
     def description(self):
         return "Checks if a file has at least a minimum number of lines"
-    
+
     @property
     def parameter_description(self):
         return {
             "file_content": "Content of the file to check",
             "min_lines": "Minimum number of lines required"
         }
-    
+
     def execute(self, file_content: str, min_lines: int) -> TestResult:
         lines = file_content.strip().split('\n')
         actual_lines = len([line for line in lines if line.strip()])
@@ -190,37 +191,37 @@ class CustomAssignmentTemplate(Template):
     """
     A custom template for a specific assignment.
     """
-    
+
     @property
     def template_name(self):
         return "Custom Assignment Template"
-    
+
     @property
     def template_description(self):
         return "Custom grading template for specific assignment requirements"
-    
+
     @property
     def requires_execution_helper(self) -> bool:
         return False
-    
+
     @property
     def execution_helper(self):
         return None
-    
+
     @property
     def requires_pre_executed_tree(self) -> bool:
         return False
-    
+
     def __init__(self):
         self.tests = {
             "has_required_file": HasRequiredFile(),
             "check_minimum_lines": CheckMinimumLines(),
             # Add more custom tests here
         }
-    
+
     def stop(self):
         pass
-    
+
     def get_test(self, name: str) -> TestFunction:
         """
         Retrieves a specific test function instance from the template.
@@ -424,10 +425,11 @@ submission/
 ```python
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 from bs4 import BeautifulSoup
 import re
 
+
 # ===============================================================
 # region: TestFunction Implementations
 # ===============================================================
@@ -436,28 +438,28 @@ class CheckResponsiveImages(TestFunction):
     @property
     def name(self):
         return "check_responsive_images"
-    
+
     @property
     def description(self):
         return "Checks if images use responsive attributes"
-    
+
     @property
     def parameter_description(self):
         return {
             "html_content": "The HTML content to analyze",
             "min_count": "Minimum number of responsive images required"
         }
-    
+
     def execute(self, html_content: str, min_count: int) -> TestResult:
         soup = BeautifulSoup(html_content, 'html.parser')
         images = soup.find_all('img')
         responsive_count = 0
-        
+
         for img in images:
             # Check for responsive attributes
             if img.get('srcset') or 'responsive' in img.get('class', []):
                 responsive_count += 1
-        
+
         score = min(100, int((responsive_count / min_count) * 100)) if min_count > 0 else 100
         report = f"Found {responsive_count} of {min_count} required responsive images."
         return TestResult(self.name, score, report, parameters={"min_count": min_count})
@@ -467,23 +469,23 @@ class CheckMediaQueries(TestFunction):
     @property
     def name(self):
         return "check_media_queries"
-    
+
     @property
     def description(self):
         return "Checks if CSS contains media queries for responsive design"
-    
+
     @property
     def parameter_description(self):
         return {
             "css_content": "The CSS content to analyze",
             "min_breakpoints": "Minimum number of breakpoints required"
         }
-    
+
     def execute(self, css_content: str, min_breakpoints: int) -> TestResult:
         pattern = r'@media\s*\([^)]+\)'
         matches = re.findall(pattern, css_content)
         breakpoint_count = len(matches)
-        
+
         score = min(100, int((breakpoint_count / min_breakpoints) * 100)) if min_breakpoints > 0 else 100
         report = f"Found {breakpoint_count} of {min_breakpoints} required media queries."
         return TestResult(self.name, score, report, parameters={"min_breakpoints": min_breakpoints})
@@ -497,36 +499,36 @@ class ResponsiveLandingPageTemplate(Template):
     """
     Custom template for responsive landing page assignment.
     """
-    
+
     @property
     def template_name(self):
         return "Responsive Landing Page Template"
-    
+
     @property
     def template_description(self):
         return "Evaluates responsive design implementation in landing pages"
-    
+
     @property
     def requires_execution_helper(self) -> bool:
         return False
-    
+
     @property
     def execution_helper(self):
         return None
-    
+
     @property
     def requires_pre_executed_tree(self) -> bool:
         return False
-    
+
     def __init__(self):
         self.tests = {
             "check_responsive_images": CheckResponsiveImages(),
             "check_media_queries": CheckMediaQueries(),
         }
-    
+
     def stop(self):
         pass
-    
+
     def get_test(self, name: str) -> TestFunction:
         """
         Retrieves a specific test function instance from the template.
diff --git a/docs/system/execution/execution_helpers.md b/docs/system/execution/execution_helpers.md
index 24171f5..36bc708 100644
--- a/docs/system/execution/execution_helpers.md
+++ b/docs/system/execution/execution_helpers.md
@@ -56,7 +56,7 @@ Important details:
 Usage example (conceptual):
 
 ```py
-from autograder.builder.execution_helpers.AI_Executor import AiExecutor
+from autograder.utils.executors.ai_executor import AiExecutor
 
 ai = AiExecutor()
 ai.send_submission_files({'main.py': 'print("hi")'})
@@ -147,7 +147,7 @@ Important configuration:
 Usage example (conceptual):
 
 ```py
-from autograder.builder.execution_helpers.sandbox_executor import SandboxExecutor
+from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
 # Create and start from request context
 sandbox = SandboxExecutor.start()
diff --git a/docs/system/pipeline_tree_logic.md b/docs/system/pipeline_tree_logic.md
new file mode 100644
index 0000000..c585ce3
--- /dev/null
+++ b/docs/system/pipeline_tree_logic.md
@@ -0,0 +1,324 @@
+# Pipeline Logic for Criteria Tree and Tree Building
+
+## Overview
+
+The pipeline implements conditional logic to optimize grading based on the number of submissions. This design eliminates unnecessary tree construction overhead when grading single submissions while maintaining efficient batch processing for multiple submissions.
+
+## Key Concepts
+
+### Why Two Paths?
+
+**Single Submission Path**: When grading only one submission, building a criteria tree and then traversing it is redundant. We can directly process the criteria configuration and build the result tree in one pass.
+
+**Multiple Submissions Path**: When grading multiple submissions, the criteria tree becomes valuable because:
+- The tree structure is built once and reused for all submissions
+- Reduces redundant parsing and validation
+- Improves overall performance through tree reuse
+
+## Pipeline Flow Diagram
+
+### Single Submission Path (Optimized)
+```
+┌─────────────────────┐
+│  Criteria Config    │
+│  (JSON/Dict)        │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│    GradeStep        │
+│ grade_from_config() │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│   ResultTree        │
+│  (Final Score)      │
+└─────────────────────┘
+```
+
+### Multiple Submissions Path (Tree-Based)
+```
+┌─────────────────────┐
+│  Criteria Config    │
+│  (JSON/Dict)        │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│  BuildTreeStep      │
+│  (Build Once)       │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│  CriteriaTree       │
+│  (Reusable)         │
+└──────────┬──────────┘
+           │
+           ▼
+    ┌──────┴──────┐
+    │  For Each   │
+    │ Submission  │
+    └──────┬──────┘
+           │
+           ▼
+┌─────────────────────┐
+│    GradeStep        │
+│  grade_from_tree()  │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│   ResultTree        │
+│  (Per Submission)   │
+└─────────────────────┘
+```
+
+## Step Implementations
+
+### BuildTreeStep
+
+**Responsibility**: Construct hierarchical criteria tree from configuration
+
+**Input**: 
+- Criteria configuration (dict)
+- Template instance
+- Submission files (for validation)
+
+**Output**: 
+- `CriteriaTree` object (fully built with test functions resolved)
+
+**When Executed**: 
+- Only when `len(submissions) > 1`
+
+**Key Features**:
+- Validates JSON schema using Pydantic models
+- Resolves test functions from template library
+- Stores test parameters and file references
+- Builds complete tree structure with weights
+
+### GradeStep
+
+**Responsibility**: Execute grading and produce result tree
+
+**Input Detection Logic**:
+```python
+if isinstance(input_data, CriteriaTree):
+    # Use tree-based grading
+    result = grader_service.grade_from_tree(criteria_tree, submission)
+else:
+    # Use config-based grading
+    result = grader_service.grade_from_config(criteria_config, template, submission)
+```
+
+**Two Grading Methods**:
+
+1. **`grade_from_config()`** - Single submission optimization
+   - Directly processes criteria configuration
+   - Builds result tree while executing tests
+   - Single-pass algorithm (no tree pre-construction)
+
+2. **`grade_from_tree()`** - Multiple submission efficiency
+   - Traverses pre-built criteria tree
+   - Executes tests from tree nodes
+   - Builds result tree from criteria tree structure
+
+**Output**: 
+- `ResultTree` object with scores and feedback
+
+## Pipeline Configuration Logic
+
+### Automatic Path Selection
+
+```python
+def configure_pipeline(submissions: List[Submission], criteria_config: dict):
+    """
+    Automatically configures pipeline based on submission count.
+    """
+    if len(submissions) == 1:
+        # Single submission: Skip tree building
+        return [
+            PreFlightStep(),
+            LoadTemplateStep(),
+            GradeStep(),  # Uses grade_from_config
+            FeedbackStep(),
+            ExportStep()
+        ]
+    else:
+        # Multiple submissions: Build tree once, reuse
+        return [
+            PreFlightStep(),
+            LoadTemplateStep(),
+            BuildTreeStep(),  # Build criteria tree
+            GradeStep(),  # Uses grade_from_tree
+            FeedbackStep(),
+            ExportStep()
+        ]
+```
+
+## Data Flow Example
+
+### Single Submission Example
+
+**Input**:
+```json
+{
+  "criteria": {
+    "name": "HTML Assignment",
+    "tests": [
+      {"name": "check_title", "weight": 50},
+      {"name": "check_header", "weight": 50}
+    ]
+  },
+  "submissions": [
+    {"files": ["index.html"]}
+  ]
+}
+```
+
+**Flow**:
+1. Criteria config loaded as dict
+2. GradeStep detects dict input
+3. Calls `grade_from_config(criteria, template, submission)`
+4. Executes tests and builds result tree simultaneously
+5. Returns final result tree
+
+### Multiple Submissions Example
+
+**Input**:
+```json
+{
+  "criteria": { /* same as above */ },
+  "submissions": [
+    {"files": ["index.html"]},
+    {"files": ["index.html"]},
+    {"files": ["index.html"]}
+  ]
+}
+```
+
+**Flow**:
+1. Criteria config loaded as dict
+2. BuildTreeStep creates `CriteriaTree` (once)
+3. For each submission:
+   - GradeStep detects `CriteriaTree` input
+   - Calls `grade_from_tree(tree, submission)`
+   - Executes tests from tree
+   - Returns result tree for that submission
+4. Collects all result trees
+
+## Performance Implications
+
+### Single Submission
+- **Avoided Overhead**: No tree construction/traversal
+- **Memory**: Lower (no tree object created)
+- **Speed**: Faster for single grading
+- **Complexity**: O(n) where n = number of tests
+
+### Multiple Submissions
+- **Tree Construction**: One-time cost
+- **Per-Submission**: Fast traversal (reuse structure)
+- **Memory**: Higher (tree persists)
+- **Speed**: Faster overall for batch processing
+- **Complexity**: O(t + n*m) where t = tree building, n = submissions, m = tests
+
+## Error Handling
+
+### BuildTreeStep Errors
+- Missing test functions in template
+- Invalid JSON schema
+- Malformed criteria structure
+- **Result**: Pipeline fails early (before grading)
+
+### GradeStep Errors
+- Test execution failures
+- File access issues
+- Runtime errors in test functions
+- **Result**: Captured in ResultTree as test failures
+
+## Type Safety
+
+The GradeStep uses robust type checking to determine the grading method:
+
+```python
+from autograder.models.criteria_tree import CriteriaTree
+
+# Type checking
+if isinstance(input_data, CriteriaTree):
+    # Definitely a tree
+    use_grade_from_tree()
+elif isinstance(input_data, dict):
+    # Configuration dictionary
+    use_grade_from_config()
+else:
+    # Error: unexpected input type
+    raise TypeError("Invalid input type for GradeStep")
+```
+
+## Benefits of This Architecture
+
+### 1. Performance Optimization
+- Single submissions: No unnecessary tree overhead
+- Multiple submissions: Efficient tree reuse
+
+### 2. Flexibility
+- Same pipeline handles both scenarios
+- Automatic path selection based on input
+
+### 3. Maintainability
+- Clear separation of concerns
+- Each step has single responsibility
+- Easy to modify or extend
+
+### 4. Consistency
+- Both paths produce identical `ResultTree` output
+- Same scoring algorithm regardless of path
+- Unified error handling
+
+### 5. Testability
+- Each grading method can be tested independently
+- Clear input/output contracts
+- Easier to debug issues
+
+## Migration from Old Architecture
+
+### Old Approach Problems
+- Pre-executed trees (confusing concept)
+- AI Executor as lazy-loading proxy (complex)
+- Multiple traversals (inefficient)
+- Mixed responsibilities
+
+### New Approach Solutions
+- ✅ Single clear tree type: `CriteriaTree`
+- ✅ Result tree built during grading
+- ✅ Optional tree building (conditional)
+- ✅ Clear step responsibilities
+- ✅ Batch optimization handled separately
+
+## Future Enhancements
+
+### Potential Optimizations
+1. **Parallel Execution**: Grade multiple submissions in parallel
+2. **Caching**: Cache template loading across requests
+3. **Streaming**: Stream results as they complete
+4. **Incremental Results**: Return partial results for long-running grades
+
+### AI Executor Integration
+For AI-based tests (e.g., essay grading):
+- Collect all AI tests during tree traversal
+- Batch API calls (single request)
+- Map results back to result tree nodes
+- Minimize API latency impact
+
+## Conclusion
+
+The pipeline's conditional tree-building logic provides an optimal balance between simplicity and performance. By detecting submission count and automatically choosing the appropriate path, we achieve:
+
+- **Fast single-submission grading** (no tree overhead)
+- **Efficient batch processing** (tree reuse)
+- **Clean architecture** (clear separation)
+- **Type-safe execution** (runtime validation)
+
+This design sets a solid foundation for scaling the autograder system while maintaining code clarity and performance.
+
diff --git a/pipeline_modes.py b/pipeline_modes.py
new file mode 100644
index 0000000..67f7851
--- /dev/null
+++ b/pipeline_modes.py
@@ -0,0 +1,175 @@
+"""
+Simple test script for grading an HTML assignment using the autograder pipeline.
+"""
+
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from autograder.autograder import build_pipeline
+from autograder.models.dataclass.submission import Submission, SubmissionFile
+
+
+def create_mock_html_submission():
+    """Create a mock HTML submission for testing."""
+    html_content = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Student Portfolio</title>
+    <link rel="stylesheet" href="style.css">
+</head>
+<body>
+    <header class="container">
+        <h1>John Doe - Portfolio</h1>
+        <nav>
+            <a href="#about">About</a>
+            <a href="#projects">Projects</a>
+        </nav>
+    </header>
+
+    <div class="container row">
+        <div class="col-md-6">
+            <h1>Welcome</h1>
+            <p>This is my portfolio website showcasing my work.</p>
+            <p>I'm a passionate developer with experience in web technologies.</p>
+            <p>Check out my projects below!</p>
+        </div>
+        <div class="col-md-6">
+            <div class="card">
+                <h2>About Me</h2>
+                <p>I love coding and creating amazing web experiences.</p>
+            </div>
+        </div>
+    </div>
+
+    <footer class="container">
+        <p>&copy; 2024 John Doe</p>
+    </footer>
+
+    <script src="script.js"></script>
+</body>
+</html>"""
+
+    submission_file = SubmissionFile(
+        filename="index.html",
+        content=html_content
+    )
+
+    submission = Submission(
+        username="student123",
+        user_id=12345,
+        assignment_id=1,
+        submission_files=[submission_file]
+    )
+
+    return submission
+
+
+def create_mock_grading_criteria():
+    """Create mock grading criteria for HTML assignment."""
+    return {
+        "base": {
+            "weight": 100,
+            "subjects": {
+                "html_structure": {
+                    "weight": 40,
+                    "tests": [
+                        {
+                            "name": "has_tag",
+                            "file": "index.html",
+                            "calls": [
+                                ["div", 5],
+                                ["h1", 2],
+                                ["p", 3],
+                                ["a", 2]
+                            ]
+                        }
+                    ]
+                },
+                "css_styling": {
+                    "weight": 30,
+                    "tests": [
+                        {
+                            "name": "has_class",
+                            "file": "index.html",
+                            "calls": [
+                                [["container", "row", "col-*"], 10]
+                            ]
+                        }
+                    ]
+                }
+            }
+        }
+    }
+
+
+def create_mock_feedback_config():
+    """Create mock feedback configuration."""
+    return {
+        "general": {
+            "report_title": "Web Development Assignment Feedback",
+            "show_score": True,
+            "show_passed_tests": False,
+            "add_report_summary": True
+        },
+        "default": {
+            "category_headers": {
+                "base": "Core Web Development Requirements",
+                "html_structure": "HTML Structure & Semantics",
+                "css_styling": "CSS Styling & Design"
+            }
+        }
+    }
+
+
+def html_grading_pipeline():
+    """Test the autograder pipeline with HTML assignment."""
+    print("\n" + "="*70)
+    print("HTML ASSIGNMENT GRADING TEST")
+    print("="*70 + "\n")
+
+    # Create mock data
+    print("📄 Creating mock HTML submission...")
+    submission = create_mock_html_submission()
+
+    print("⚙️  Creating grading criteria...")
+    grading_criteria = create_mock_grading_criteria()
+
+    print("📋 Creating feedback configuration...")
+    feedback_config = create_mock_feedback_config()
+
+    # Build the pipeline
+    print("🔧 Building autograder pipeline...")
+    pipeline = build_pipeline(
+        template_name="webdev",
+        include_feedback=False,  # Set to True to include feedback generation
+        grading_criteria=grading_criteria,
+        feedback_config=feedback_config,
+        setup_config=None,
+        custom_template=None,
+        feedback_mode=None,
+        submission_files={sf.filename: sf.content for sf in submission.submission_files}
+    )
+
+    print("✅ Pipeline built successfully!\n")
+    print("Pipeline steps:")
+    for i, step in enumerate(pipeline._steps, 1):
+        print(f"  {i}. {step.__class__.__name__}")
+
+    print("\n" + "="*70)
+    print("Pipeline is ready. You can now implement the rest!")
+    print("="*70 + "\n")
+
+    return pipeline
+
+
+if __name__ == "__main__":
+    pipeline = html_grading_pipeline()
+
+    pipeline.run(create_mock_html_submission())
+
diff --git a/requirements.txt b/requirements.txt
index 6639fa8..b856c04 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,10 +8,10 @@ openai==1.93.0
 requests~=2.32.4
 beautifulsoup4~=4.13.4
 dotenv~=0.9.9
-pydantic
+pydantic~=2.12.5
 python-dotenv~=1.1.1
 upstash-redis==1.4.0
 fastapi~=0.115.0
 uvicorn[standard]~=0.32.0
 python-multipart
-docker~=7.1.0
\ No newline at end of file
+docker~=7.1.0
diff --git a/tests/data/curl_examples.sh b/tests/data/curl_examples.sh
old mode 100755
new mode 100644
diff --git a/tests/data/custom_template/criteria.json b/tests/data/custom_template/criteria.json
index 4c8485c..e69de29 100644
--- a/tests/data/custom_template/criteria.json
+++ b/tests/data/custom_template/criteria.json
@@ -1,24 +0,0 @@
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "custom_tests": {
-        "weight": 100,
-        "tests": [
-          {
-            "name": "check_file_exists",
-            "calls": [
-              ["main.py"]
-            ]
-          },
-          {
-            "name": "check_function_exists",
-            "calls": [
-              ["greet"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/custom_template/custom_template.py b/tests/data/custom_template/custom_template.py
index 18f69df..23ce232 100644
--- a/tests/data/custom_template/custom_template.py
+++ b/tests/data/custom_template/custom_template.py
@@ -1,7 +1,7 @@
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
 from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 from autograder.context import request_context
 
 
@@ -29,7 +29,7 @@ def parameter_description(self):
     def execute(self, filename: str) -> TestResult:
         request = request_context.get_request()
         submission_files = request.submission_files
-        
+
         if filename in submission_files:
             return TestResult(
                 self.name,
diff --git a/tests/data/custom_template/main.py b/tests/data/custom_template/main.py
index 7c1b20f..e69de29 100644
--- a/tests/data/custom_template/main.py
+++ b/tests/data/custom_template/main.py
@@ -1,12 +0,0 @@
-def greet(name):
-    """Simple greeting function."""
-    return f"Hello, {name}!"
-
-
-def main():
-    """Main function."""
-    print(greet("World"))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/data/essay/criteria.json b/tests/data/essay/criteria.json
index 4f53368..e69de29 100644
--- a/tests/data/essay/criteria.json
+++ b/tests/data/essay/criteria.json
@@ -1,25 +0,0 @@
-{
-  "test_library": "essay",
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "foundations": {
-        "weight": 60,
-        "tests": [
-          { "file": "essay.txt", "name": "thesis_statement" },
-          { "file": "essay.txt", "name": "clarity_and_cohesion" }
-        ]
-      },
-      "prompt_adherence": {
-        "weight": 40,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "adherence_to_prompt",
-            "calls": [["Discuss the impact of AI on the workforce."]]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/essay/essay.txt b/tests/data/essay/essay.txt
index 99f928e..e69de29 100644
--- a/tests/data/essay/essay.txt
+++ b/tests/data/essay/essay.txt
@@ -1 +0,0 @@
-Artificial intelligence (AI) is reshaping the global economy and the modern workplace. While AI augments human capabilities and automates repetitive tasks, it also raises questions about job displacement, fairness, and the need for upskilling. To navigate this transition responsibly, educators and organizations must emphasize critical thinking, ethical use of AI, and collaboration between humans and intelligent systems. With thoughtful policy and training, AI can become a multiplier for productivity and creativity rather than a replacement for human potential.
diff --git a/tests/data/essay/feedback.json b/tests/data/essay/feedback.json
index 0bdac9a..e69de29 100644
--- a/tests/data/essay/feedback.json
+++ b/tests/data/essay/feedback.json
@@ -1,5 +0,0 @@
-{
-  "style": "concise",
-  "include_suggestions": true,
-  "tone": "supportive"
-}
diff --git a/tests/data/input_output/criteria.json b/tests/data/input_output/criteria.json
index b643bd7..e69de29 100644
--- a/tests/data/input_output/criteria.json
+++ b/tests/data/input_output/criteria.json
@@ -1,33 +0,0 @@
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "basic_operations": {
-        "weight": 50,
-        "tests": [
-          {
-            "name": "expect_output",
-            "calls": [
-              [["add", "5", "3"], "8"],
-              [["subtract", "10", "4"], "6"],
-              [["multiply", "7", "6"], "42"]
-            ]
-          }
-        ]
-      },
-      "edge_cases": {
-        "weight": 50,
-        "tests": [
-          {
-            "name": "expect_output",
-            "calls": [
-              [["add", "0", "0"], "0"],
-              [["multiply", "5", "0"], "0"],
-              [["divide", "10", "2"], "5.0"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/input_output/requirements.txt b/tests/data/input_output/requirements.txt
index d949add..e69de29 100644
--- a/tests/data/input_output/requirements.txt
+++ b/tests/data/input_output/requirements.txt
@@ -1 +0,0 @@
-# No external dependencies required for this simple calculator
diff --git a/tests/data/input_output/setup.json b/tests/data/input_output/setup.json
index 553ee7d..e69de29 100644
--- a/tests/data/input_output/setup.json
+++ b/tests/data/input_output/setup.json
@@ -1,7 +0,0 @@
-{
-  "runtime_image": "python:3.11-slim",
-  "start_command": "python calculator.py",
-  "commands": {
-    "install_dependencies": "pip install --no-cache-dir -r requirements.txt"
-  }
-}
diff --git a/tests/data/web_dev/criteria.json b/tests/data/web_dev/criteria.json
index b243029..ab144a7 100644
--- a/tests/data/web_dev/criteria.json
+++ b/tests/data/web_dev/criteria.json
@@ -4,7 +4,8 @@
     "subjects": {
       "html_structure": {
         "weight": 40,
-        "tests": [
+        "tests":
+        [
           {
             "name": "has_tag",
             "file": "index.html",
diff --git a/tests/data/web_dev/style.css b/tests/data/web_dev/style.css
index 646e36a..e69de29 100644
--- a/tests/data/web_dev/style.css
+++ b/tests/data/web_dev/style.css
@@ -1,44 +0,0 @@
-.container {
-    max-width: 1200px;
-    margin: 0 auto;
-    padding: 20px;
-}
-
-.row {
-    display: flex;
-    gap: 20px;
-}
-
-h1 {
-    color: #333333;
-    font-size: 32px;
-}
-
-h2 {
-    color: #666666;
-    font-size: 24px;
-}
-
-p {
-    color: #444444;
-    font-size: 16px;
-    line-height: 1.6;
-}
-
-.card {
-    background: #f5f5f5;
-    padding: 15px;
-    border-radius: 8px;
-}
-
-nav a {
-    color: #007bff;
-    text-decoration: none;
-    margin-right: 15px;
-}
-
-footer {
-    margin-top: 40px;
-    border-top: 1px solid #ddd;
-    padding-top: 20px;
-}
diff --git a/tests/playroom/run_all_playrooms.py b/tests/playroom/run_all_playrooms.py
index 17f66bb..e69de29 100644
--- a/tests/playroom/run_all_playrooms.py
+++ b/tests/playroom/run_all_playrooms.py
@@ -1,160 +0,0 @@
-"""
-Run All Playrooms
-
-This script allows you to run all playrooms or individual ones for testing purposes.
-
-Usage:
-    python -m tests.playroom.run_all_playrooms              # Run all playrooms
-    python -m tests.playroom.run_all_playrooms webdev       # Run only webdev playroom
-    python -m tests.playroom.run_all_playrooms api essay    # Run multiple playrooms
-    python -m tests.playroom.run_all_playrooms --list       # List available playrooms
-"""
-
-import sys
-import argparse
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-
-# Import all playroom functions
-from tests.playroom.webdev_playroom import run_webdev_playroom
-from tests.playroom.api_playroom import run_api_playroom
-from tests.playroom.essay_playroom import run_essay_playroom
-from tests.playroom.io_playroom import run_io_playroom
-
-
-# Map of playroom names to their runner functions
-PLAYROOMS = {
-    "webdev": {
-        "name": "Web Development",
-        "runner": run_webdev_playroom,
-        "description": "Tests HTML/CSS grading with Bootstrap and custom classes"
-    },
-    "api": {
-        "name": "API Testing",
-        "runner": run_api_playroom,
-        "description": "Tests REST API endpoints in a Docker container"
-    },
-    "essay": {
-        "name": "Essay Grading",
-        "runner": run_essay_playroom,
-        "description": "Tests AI-powered essay grading (requires OpenAI API key)"
-    },
-    "io": {
-        "name": "Input/Output",
-        "runner": run_io_playroom,
-        "description": "Tests command-line programs with stdin/stdout validation"
-    }
-}
-
-
-def list_playrooms():
-    """Display all available playrooms."""
-    print("\n" + "="*70)
-    print("AVAILABLE PLAYROOMS")
-    print("="*70 + "\n")
-
-    for key, info in PLAYROOMS.items():
-        print(f"  {key:10} - {info['name']}")
-        print(f"             {info['description']}")
-        print()
-
-    print("="*70 + "\n")
-
-
-def run_playroom(playroom_key: str):
-    """Run a specific playroom by its key."""
-    if playroom_key not in PLAYROOMS:
-        print(f"❌ Error: Unknown playroom '{playroom_key}'")
-        print(f"   Available playrooms: {', '.join(PLAYROOMS.keys())}")
-        return False
-
-    try:
-        PLAYROOMS[playroom_key]["runner"]()
-        return True
-    except Exception as e:
-        print(f"\n❌ Error running {playroom_key} playroom: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def run_all():
-    """Run all playrooms sequentially."""
-    print("\n" + "#"*70)
-    print("# RUNNING ALL PLAYROOMS")
-    print("#"*70 + "\n")
-
-    results = {}
-    for key in PLAYROOMS.keys():
-        success = run_playroom(key)
-        results[key] = success
-        print("\n" + "-"*70 + "\n")
-
-    # Summary
-    print("\n" + "="*70)
-    print("SUMMARY")
-    print("="*70 + "\n")
-
-    for key, success in results.items():
-        status = "✅ SUCCESS" if success else "❌ FAILED"
-        print(f"  {PLAYROOMS[key]['name']:20} - {status}")
-
-    total = len(results)
-    passed = sum(1 for s in results.values() if s)
-    print(f"\n  Total: {passed}/{total} playrooms completed successfully")
-    print("\n" + "="*70 + "\n")
-
-
-def main():
-    """Main entry point for the playroom runner."""
-    parser = argparse.ArgumentParser(
-        description="Run autograder playrooms for testing",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  python -m tests.playroom.run_all_playrooms              # Run all playrooms
-  python -m tests.playroom.run_all_playrooms webdev       # Run only webdev
-  python -m tests.playroom.run_all_playrooms api essay    # Run multiple
-  python -m tests.playroom.run_all_playrooms --list       # List available
-        """
-    )
-
-    parser.add_argument(
-        'playrooms',
-        nargs='*',
-        help='Specific playrooms to run (e.g., webdev api). If none specified, runs all.'
-    )
-
-    parser.add_argument(
-        '--list',
-        action='store_true',
-        help='List all available playrooms'
-    )
-
-    args = parser.parse_args()
-
-    # Handle --list flag
-    if args.list:
-        list_playrooms()
-        return
-
-    # If no playrooms specified, run all
-    if not args.playrooms:
-        run_all()
-        return
-
-    # Run specified playrooms
-    print(f"\n🎮 Running {len(args.playrooms)} playroom(s)...\n")
-    for playroom_key in args.playrooms:
-        run_playroom(playroom_key)
-        if len(args.playrooms) > 1:
-            print("\n" + "-"*70 + "\n")
-
-
-if __name__ == "__main__":
-    main()
-
diff --git a/tests/unit/builder/__init__.py b/tests/unit/builder/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/builder/test_tree.py b/tests/unit/builder/test_tree.py
deleted file mode 100644
index 3db3717..0000000
--- a/tests/unit/builder/test_tree.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import unittest
-# Assuming your tree builder and models are in this path
-from autograder.builder.tree_builder import CriteriaTree, Criteria, Subject, Test, TestCall
-
-class TestCriteriaTree(unittest.TestCase):
-
-    def test_empty_config(self):
-        """
-        Tests that building a tree from an empty config results in an empty Criteria object.
-        """
-        config = {}
-        criteria = CriteriaTree.build(config)
-        self.assertIsInstance(criteria, Criteria)
-        self.assertEqual(len(criteria.base.subjects), 0)
-
-    def test_invalid_subject(self):
-        """
-        Tests that a ValueError is raised if a subject has both 'tests' and 'subjects'.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "invalid_subject": {
-                        "tests": [{"file": "index.html", "name": "some_test"}],
-                        "subjects": {"sub_subject": {}}
-                    }
-                }
-            }
-        }
-        with self.assertRaises(ValueError):
-            CriteriaTree.build(config)
-
-    def test_weight_balancing(self):
-        """
-        Tests that the weights of sibling subjects are correctly balanced to sum to 100.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "html": {"weight": 60, "tests": []},
-                    "css": {"weight": 40, "tests": []}
-                }
-            },
-            "bonus": {
-                "weight": 50,
-                "subjects": {
-                    # These weights (10 + 10 = 20) will be scaled to sum to 100
-                    "accessibility": {"weight": 10, "tests": []},
-                    "performance": {"weight": 10, "tests": []}
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-
-        # Check base subjects (already sum to 100)
-        self.assertAlmostEqual(criteria.base.subjects["html"].weight, 60)
-        self.assertAlmostEqual(criteria.base.subjects["css"].weight, 40)
-
-        # Check bonus subjects (should be scaled: 10/20 -> 50, 10/20 -> 50)
-        self.assertAlmostEqual(criteria.bonus.subjects["accessibility"].weight, 50)
-        self.assertAlmostEqual(criteria.bonus.subjects["performance"].weight, 50)
-        self.assertEqual(criteria.bonus.max_score, 50)
-
-    def test_structure_and_defaults_with_new_format(self):
-        """
-        Tests the overall structure with the new explicit test format.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "html": {
-                        "tests": [
-                            # Test with no calls
-                            {"file": "index.html", "name": "test1"},
-                            # Test with calls
-                            {
-                                "file": "index.html",
-                                "name": "test2",
-                                "calls": [["arg1", 1], ["arg2"]]
-                            },
-                            # Simple string test (should get a default file)
-                            "test3"
-                        ]
-                    }
-                }
-            },
-            "penalty": {"weight": 75}
-        }
-        criteria = CriteriaTree.build(config)
-
-        # Test category weights
-        self.assertEqual(criteria.penalty.max_score, 75)
-        self.assertEqual(criteria.bonus.max_score, 0)  # Default
-
-        # Test subject structure
-        self.assertIn("html", criteria.base.subjects)
-        html_subject = criteria.base.subjects["html"]
-        self.assertIsInstance(html_subject, Subject)
-        #self.assertEqual(html_subject.weight, 100)  # Default weight when it's the only subject
-
-        # Test tests structure
-        self.assertEqual(len(html_subject.tests), 3)
-
-        # Find and verify test1
-        test1 = next(t for t in html_subject.tests if t.name == "test1")
-        self.assertEqual(test1.file, "index.html")
-        self.assertEqual(len(test1.calls), 1)
-        self.assertEqual(test1.calls[0].args, [])
-
-        # Find and verify test2
-        test2 = next(t for t in html_subject.tests if t.name == "test2")
-        self.assertEqual(test2.file, "index.html")
-        self.assertEqual(len(test2.calls), 2)
-        self.assertEqual(test2.calls[0].args, ["arg1", 1])
-        self.assertEqual(test2.calls[1].args, ["arg2"])
-
-        # Find and verify test3 (simple string)
-        test3 = next(t for t in html_subject.tests if t.name == "test3")
-        self.assertEqual(test3.file, "index.html") # Check default file assignment
-        self.assertEqual(len(test3.calls), 1)
-        self.assertEqual(test3.calls[0].args, [])
-
-    def test_complex_weight_balancing(self):
-        """
-        Tests weight balancing with a more complex, nested subject structure.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "frontend": {
-                        "weight": 75,
-                        "subjects": {
-                            "html": {"weight": 50, "tests": []},
-                            "css": {"weight": 50, "tests": []}
-                        }
-                    },
-                    "backend": {
-                        "weight": 25,
-                        "subjects": {
-                            # These weights (10 + 30 = 40) will be scaled to sum to 100
-                            "database": {"weight": 10, "tests": []},
-                            "api": {"weight": 30, "tests": []}
-                        }
-                    }
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-
-        # Top-level subjects should not be re-balanced as they sum to 100
-        self.assertAlmostEqual(criteria.base.subjects["frontend"].weight, 75)
-        self.assertAlmostEqual(criteria.base.subjects["backend"].weight, 25)
-
-        # Nested subjects in 'frontend' are already balanced
-        frontend = criteria.base.subjects["frontend"]
-        self.assertAlmostEqual(frontend.subjects["html"].weight, 50)
-        self.assertAlmostEqual(frontend.subjects["css"].weight, 50)
-
-        # Nested subjects in 'backend' should be re-balanced
-        backend = criteria.base.subjects["backend"]
-        self.assertAlmostEqual(backend.subjects["database"].weight, 25) # 10/40 -> 25
-        self.assertAlmostEqual(backend.subjects["api"].weight, 75)      # 30/40 -> 75
-
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/core/reporter/__init__.py b/tests/unit/core/reporter/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/core/reporter/test_ai_reporter.py b/tests/unit/core/reporter/test_ai_reporter.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/core/reporter/test_default_reporter.py b/tests/unit/core/reporter/test_default_reporter.py
deleted file mode 100644
index 2e9d94b..0000000
--- a/tests/unit/core/reporter/test_default_reporter.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import unittest
-from autograder.core.report.default_reporter import DefaultReporter
-from autograder.core.models.result import Result
-from autograder.core.models.test_result import TestResult
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-
-
-class TestDefaultReporter(unittest.TestCase):
-
-    def setUp(self):
-        """Set up a mock Result and FeedbackPreferences object for testing."""
-
-        # Create a variety of test results for different scenarios
-        base_results = [
-            TestResult("passing_base_test", 100, "Base test passed.", "html"),
-            TestResult("failing_base_test", 0, "Base test failed.", "css", {"file": "style.css"})
-        ]
-        bonus_results = [
-            TestResult("passing_bonus_test", 100, "Bonus achieved!", "javascript"),
-            TestResult("failing_bonus_test", 50, "Bonus partially met.", "accessibility")
-        ]
-        penalty_results = [
-            TestResult("passing_penalty_test", 100, "Penalty avoided.", "html_validation"),
-            TestResult("failing_penalty_test", 0, "Penalty applied for malpractice.", "js_malpractice")
-        ]
-
-        self.mock_result = Result(
-            final_score=75.5,
-            author="Jane Doe",
-            submission_file={"index.html": ""},
-            base_results=base_results,
-            bonus_results=bonus_results,
-            penalty_results=penalty_results
-        )
-
-        # Create custom feedback preferences
-        feedback_config = {
-            "general": {
-                "report_title": "Test Report",
-                "show_passed_tests": True,
-                "add_report_summary": True,
-                "online_content": [{
-                    "url": "http://example.com/css-guide",
-                    "description": "CSS Best Practices",
-                    "linked_tests": ["failing_base_test"]
-                }]
-            },
-            "default": {
-                "category_headers": {
-                    "base": "Core Requirements",
-                    "bonus": "Extra Credit",
-                    "penalty": "Areas for Improvement"
-                }
-            }
-        }
-        self.mock_feedback_prefs = FeedbackPreferences.from_dict(feedback_config)
-
-    def test_report_header(self):
-        """Tests if the report header is generated correctly."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        self.assertIn("# Test Report", feedback)
-        self.assertIn("### Olá, **Jane Doe**! 👋", feedback)
-        self.assertIn("> **Nota Final:** **`75.50 / 100`**", feedback)
-
-    def test_report_sections_and_content(self):
-        """
-        Tests that each category section is correctly rendered based on feedback preferences.
-        """
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # Check for custom headers
-        self.assertIn("## Core Requirements", feedback)
-        self.assertIn("## Extra Credit", feedback)
-        self.assertIn("## Areas for Improvement", feedback)
-
-        # Base section should only show the failing test
-        self.assertIn("failing_base_test", feedback)
-        self.assertNotIn("passing_base_test", feedback)
-
-        # Bonus section should only show the passing test (since show_passed_tests is True)
-        self.assertIn("passing_bonus_test", feedback)
-        self.assertNotIn("failing_bonus_test", feedback)
-
-        # Penalty section should only show the failing (applied) penalty
-        self.assertIn("failing_penalty_test", feedback)
-        self.assertNotIn("passing_penalty_test", feedback)
-
-    def test_parameter_formatting(self):
-        """Tests if test parameters are formatted correctly in the report."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # Check for the formatted parameter string in the failing base test
-        self.assertIn("(Parâmetros: `file: 'style.css'`)", feedback)
-
-    def test_summary_table_generation(self):
-        """Tests the generation of the summary table with correct entries."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        self.assertIn("### 📝 Resumo dos Pontos de Atenção", feedback)
-        # Should contain the failing base test and the failing penalty test
-        self.assertIn("| Revisar | `css` | `failing_base_test` (Parâmetros: `file: 'style.css'`) |", feedback)
-        self.assertIn("| Corrigir (Penalidade) | `js_malpractice` | `failing_penalty_test` |", feedback)
-        # Should NOT contain any passing tests
-        self.assertNotIn("passing_base_test", feedback.split("### 📝 Resumo dos Pontos de Atenção")[1])
-
-    def test_online_content_linking(self):
-        """Tests if suggested learning resources are correctly linked in the report."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # The failing_base_test is linked to a resource, so it should be present
-        expected_link = "> 📚 **Recurso Sugerido:** [CSS Best Practices](http://example.com/css-guide)"
-        self.assertIn(expected_link, feedback)
-
-    def test_no_issues_report(self):
-        """Tests the report format when all tests pass and no penalties are applied."""
-        # Create a result object with only passing scores
-        all_passing_result = Result(
-            final_score=100.0, author="John Doe", submission_file={},
-            base_results=[TestResult("p1", 100, "p", "s1")],
-            bonus_results=[TestResult("p2", 100, "p", "s2")],
-            penalty_results=[]
-        )
-        reporter = DefaultReporter(all_passing_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # No category sections for base/penalty should be generated
-        self.assertNotIn("## Core Requirements", feedback)
-        self.assertNotIn("## Areas for Improvement", feedback)
-
-        # Summary should show the success message
-        self.assertIn("Excelente trabalho! Nenhum ponto crítico de atenção foi encontrado.", feedback)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/unit/core/test_grader.py b/tests/unit/core/test_grader.py
deleted file mode 100644
index a1a1adb..0000000
--- a/tests/unit/core/test_grader.py
+++ /dev/null
@@ -1,177 +0,0 @@
-import unittest
-from typing import Dict, List, Any
-
-# Assuming these classes are in your project structure
-from autograder.builder.tree_builder import CriteriaTree, Criteria, Subject, Test, TestCall
-from autograder.core.models.result import Result
-from autograder.core.models.test_result import TestResult
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.core.grading.grader import Grader
-
-# ===============================================================
-# Mock Template Library based on the new TestFunction model
-# ===============================================================
-
-class PassingTest(TestFunction):
-    @property
-    def name(self): return "passing_test"
-    @property
-    def description(self): return "A mock test that always passes."
-    @property
-    def parameter_description(self): return {}
-    def execute(self, *args, **kwargs) -> TestResult:
-        return TestResult(self.name, 100, "This test always passes.")
-
-class FailingTest(TestFunction):
-    @property
-    def name(self): return "failing_test"
-    @property
-    def description(self): return "A mock test that always fails."
-    @property
-    def parameter_description(self): return {}
-    def execute(self, *args, **kwargs) -> TestResult:
-        return TestResult(self.name, 0, "This test always fails.")
-
-class PartialTest(TestFunction):
-    @property
-    def name(self): return "partial_test"
-    @property
-    def description(self): return "A mock test that gives partial credit."
-    @property
-    def parameter_description(self): return {}
-    def execute(self, *args, **kwargs) -> TestResult:
-        return TestResult(self.name, 50, "This test gives partial credit.")
-
-class MockTemplate(Template):
-    @property
-    def name(self):
-        return "Mock Library"
-
-    def __init__(self):
-        self.tests = {
-            "passing_test": PassingTest(),
-            "failing_test": FailingTest(),
-            "partial_test": PartialTest(),
-        }
-
-    def get_test(self, name: str) -> TestFunction:
-        return self.tests.get(name)
-
-# ===============================================================
-# Updated Unit Test Class
-# ===============================================================
-
-class TestGrader(unittest.TestCase):
-
-    def setUp(self):
-        """
-        Set up a common mock library and submission data for the tests.
-        """
-        self.mock_library = MockTemplate()
-        self.submission_files = {"index.html": "<html></html>"}
-        self.author_name = "Test Student"
-
-    def test_basic_score_calculation(self):
-        """
-        Tests the final score calculation with a mix of passing and failing tests.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "html": {
-                        "weight": 100,
-                        "tests": [
-                            {"file": "index.html", "name": "passing_test"},
-                            {"file": "index.html", "name": "failing_test"}
-                        ]
-                    }
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-        # Average of tests: (100 + 0) / 2 = 50. Subject score = 50.
-        self.assertAlmostEqual(result.final_score, 50)
-
-    def test_bonus_points_application(self):
-        """
-        Tests that bonus points are correctly applied to the final score.
-        """
-        config = {
-            "base": {
-                "subjects": {"html": {"tests": [{"file": "index.html", "name": "partial_test"}]}}
-            },
-            "bonus": {
-                "weight": 20, # This is the max_score for the bonus category
-                "subjects": {"extra": {"tests": [{"file": "index.html", "name": "passing_test"}]}}
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-
-        # Base score = 50. Bonus score = 100.
-        # Bonus points to add = (100 / 100) * 20 = 20.
-        # Final score = 50 + 20 = 70.
-        self.assertAlmostEqual(result.final_score, 70)
-
-    def test_penalty_points_deduction(self):
-        """
-        Tests that penalty points are correctly deducted from the final score.
-        A "failing" penalty test (score=0) means the penalty IS applied.
-        """
-        config = {
-            "base": {
-                "subjects": {"html": {"tests": [{"file": "index.html", "name": "passing_test"}]}}
-            },
-            "penalty": {
-                "weight": 30, # This is the max_score for the penalty category
-                "subjects": {"malpractice": {"tests": [{"file": "index.html", "name": "failing_test"}]}}
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-
-        # Base score = 100.
-        # Penalty test failed (score=0), so 100% of the penalty is incurred.
-        # Penalty points to subtract = (100 / 100) * 30 = 30.
-        # Final score = 100 - 30 = 70.
-        self.assertAlmostEqual(result.final_score, 70)
-
-    def test_complex_grading_with_nested_subjects(self):
-        """
-        Tests the grader with a more complex, nested criteria tree with varying weights.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "frontend": {
-                        "weight": 80,
-                        "subjects": {
-                            "html": {"weight": 50, "tests": [{"file": "index.html", "name": "passing_test"}]}, # Score: 100
-                            "css": {"weight": 50, "tests": [{"file": "index.html", "name": "failing_test"}]}  # Score: 0
-                        }
-                    },
-                    "backend": {
-                        "weight": 20,
-                        "tests": [{"file": "index.html", "name": "partial_test"}] # Score: 50
-                    }
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-
-        # Frontend score (weighted avg of children) = (100 * 0.5) + (0 * 0.5) = 50
-        # Backend score = 50
-        # Total base score (weighted avg of children) = (50 * 0.8) + (50 * 0.2) = 40 + 10 = 50
-        self.assertAlmostEqual(result.final_score, 50)
-        self.assertIsInstance(result, Result)
-        self.assertEqual(len(grader.base_results), 3)
-
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
diff --git a/tests/unit/templates/__init__.py b/tests/unit/templates/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/test_config_models.py b/tests/unit/test_config_models.py
new file mode 100644
index 0000000..ce63a39
--- /dev/null
+++ b/tests/unit/test_config_models.py
@@ -0,0 +1,518 @@
+"""Test suite for configuration models to ensure they align with criteria_schema.json"""
+import json
+import pytest
+from pathlib import Path
+
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.config.category import CategoryConfig
+from autograder.models.config.subject import SubjectConfig
+from autograder.models.config.test import TestConfig, ParameterConfig
+
+
+@pytest.fixture
+def criteria_schema_path():
+    """Path to the criteria_schema.json file"""
+    return Path(__file__).parent.parent.parent / "critera_schema.json"
+
+
+@pytest.fixture
+def criteria_schema_dict(criteria_schema_path):
+    """Load criteria schema as dictionary"""
+    with open(criteria_schema_path, 'r') as f:
+        return json.load(f)
+
+
+@pytest.fixture
+def criteria_config(criteria_schema_dict):
+    """Parse criteria schema into CriteriaConfig object"""
+    return CriteriaConfig.from_dict(criteria_schema_dict)
+
+
+class TestParameterConfig:
+    """Test ParameterConfig model"""
+
+    def test_parameter_config_creation(self):
+        """Test creating a ParameterConfig"""
+        param = ParameterConfig(name="tag", value="body")
+        assert param.name == "tag"
+        assert param.value == "body"
+
+    def test_parameter_config_with_int_value(self):
+        """Test ParameterConfig with integer value"""
+        param = ParameterConfig(name="required_count", value=1)
+        assert param.name == "required_count"
+        assert param.value == 1
+        assert isinstance(param.value, int)
+
+    def test_parameter_config_forbid_extra(self):
+        """Test that extra fields are forbidden"""
+        with pytest.raises(ValueError):
+            ParameterConfig(name="tag", value="body", extra_field="not allowed")
+
+
+class TestTestConfig:
+    """Test TestConfig model"""
+
+    def test_test_config_with_file_and_name(self):
+        """Test creating a TestConfig with file and name"""
+        test = TestConfig(
+            file="index.html",
+            name="has_tag",
+            parameters=[
+                ParameterConfig(name="tag", value="body"),
+                ParameterConfig(name="required_count", value=1)
+            ]
+        )
+        assert test.file == "index.html"
+        assert test.name == "has_tag"
+        assert len(test.parameters) == 2
+
+    def test_test_config_without_parameters(self):
+        """Test TestConfig without parameters"""
+        test = TestConfig(file="index.html", name="check_css_linked")
+        assert test.file == "index.html"
+        assert test.name == "check_css_linked"
+        assert test.parameters is None
+
+    def test_get_args_list(self):
+        """Test converting parameters to args list"""
+        test = TestConfig(
+            file="index.html",
+            name="has_tag",
+            parameters=[
+                ParameterConfig(name="tag", value="body"),
+                ParameterConfig(name="required_count", value=1)
+            ]
+        )
+        args = test.get_args_list()
+        assert args == ["body", 1]
+
+    def test_get_kwargs_dict(self):
+        """Test converting parameters to kwargs dict"""
+        test = TestConfig(
+            file="index.html",
+            name="has_tag",
+            parameters=[
+                ParameterConfig(name="tag", value="body"),
+                ParameterConfig(name="required_count", value=1)
+            ]
+        )
+        kwargs = test.get_kwargs_dict()
+        assert kwargs == {"tag": "body", "required_count": 1}
+
+    def test_get_args_list_empty(self):
+        """Test get_args_list with no parameters"""
+        test = TestConfig(file="index.html", name="check_css_linked")
+        args = test.get_args_list()
+        assert args == []
+
+    def test_get_kwargs_dict_empty(self):
+        """Test get_kwargs_dict with no parameters"""
+        test = TestConfig(file="index.html", name="check_css_linked")
+        kwargs = test.get_kwargs_dict()
+        assert kwargs == {}
+
+
+class TestSubjectConfig:
+    """Test SubjectConfig model"""
+
+    def test_subject_config_with_tests(self):
+        """Test SubjectConfig with tests only"""
+        subject = SubjectConfig(
+            subject_name="structure",
+            weight=40,
+            tests=[
+                TestConfig(file="index.html", name="has_tag", parameters=[
+                    ParameterConfig(name="tag", value="body")
+                ])
+            ]
+        )
+        assert subject.subject_name == "structure"
+        assert subject.weight == 40
+        assert len(subject.tests) == 1
+        assert subject.subjects is None
+
+    def test_subject_config_with_nested_subjects(self):
+        """Test SubjectConfig with nested subjects"""
+        subject = SubjectConfig(
+            subject_name="html",
+            weight=60,
+            subjects=[
+                SubjectConfig(
+                    subject_name="structure",
+                    weight=40,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )
+            ]
+        )
+        assert subject.subject_name == "html"
+        assert subject.weight == 60
+        assert len(subject.subjects) == 1
+        assert subject.subjects[0].subject_name == "structure"
+
+    def test_subject_config_validation_requires_tests_or_subjects(self):
+        """Test that SubjectConfig requires at least tests or subjects"""
+        with pytest.raises(ValueError, match="must have at least 'tests' or 'subjects'"):
+            SubjectConfig(subject_name="invalid", weight=50)
+
+    def test_subject_config_with_both_tests_and_subjects_requires_subjects_weight(self):
+        """Test that having both tests and subjects requires subjects_weight"""
+        with pytest.raises(ValueError, match="needs 'subjects_weight' defined"):
+            SubjectConfig(
+                subject_name="mixed",
+                weight=50,
+                tests=[TestConfig(file="index.html", name="has_tag")],
+                subjects=[SubjectConfig(
+                    subject_name="nested",
+                    weight=30,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )]
+            )
+
+    def test_subject_config_with_both_tests_and_subjects_with_weight(self):
+        """Test that having both tests and subjects works with subjects_weight"""
+        subject = SubjectConfig(
+            subject_name="mixed",
+            weight=50,
+            tests=[TestConfig(file="index.html", name="has_tag")],
+            subjects=[SubjectConfig(
+                subject_name="nested",
+                weight=30,
+                tests=[TestConfig(file="index.html", name="has_tag")]
+            )],
+            subjects_weight=60
+        )
+        assert subject.subject_name == "mixed"
+        assert subject.subjects_weight == 60
+
+
+class TestCategoryConfig:
+    """Test CategoryConfig model"""
+
+    def test_category_config_with_subjects(self):
+        """Test CategoryConfig with subjects"""
+        category = CategoryConfig(
+            weight=100,
+            subjects=[
+                SubjectConfig(
+                    subject_name="html",
+                    weight=60,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )
+            ]
+        )
+        assert category.weight == 100
+        assert len(category.subjects) == 1
+        assert category.subjects[0].subject_name == "html"
+
+    def test_category_config_validation_requires_tests_or_subjects(self):
+        """Test that CategoryConfig requires at least tests or subjects"""
+        with pytest.raises(ValueError, match="must have at least 'tests' or 'subjects'"):
+            CategoryConfig(weight=100)
+
+    def test_category_config_with_both_tests_and_subjects_requires_subjects_weight(self):
+        """Test that having both tests and subjects requires subjects_weight"""
+        with pytest.raises(ValueError, match="needs 'subjects_weight' defined"):
+            CategoryConfig(
+                weight=100,
+                tests=[TestConfig(file="index.html", name="has_tag")],
+                subjects=[SubjectConfig(
+                    subject_name="nested",
+                    weight=30,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )]
+            )
+
+
+class TestCriteriaConfig:
+    """Test CriteriaConfig model"""
+
+    def test_criteria_config_basic(self):
+        """Test basic CriteriaConfig creation"""
+        criteria = CriteriaConfig(
+            test_library="web_dev",
+            base=CategoryConfig(
+                weight=100,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="html",
+                        weight=60,
+                        tests=[TestConfig(file="index.html", name="has_tag")]
+                    )
+                ]
+            )
+        )
+        assert criteria.test_library == "web_dev"
+        assert criteria.base.weight == 100
+        assert criteria.bonus is None
+        assert criteria.penalty is None
+
+    def test_criteria_config_with_all_categories(self):
+        """Test CriteriaConfig with base, bonus, and penalty"""
+        criteria = CriteriaConfig(
+            test_library="web_dev",
+            base=CategoryConfig(
+                weight=100,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="html",
+                        weight=60,
+                        tests=[TestConfig(file="index.html", name="has_tag")]
+                    )
+                ]
+            ),
+            bonus=CategoryConfig(
+                weight=40,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="accessibility",
+                        weight=20,
+                        tests=[TestConfig(file="index.html", name="check_all_images_have_alt")]
+                    )
+                ]
+            ),
+            penalty=CategoryConfig(
+                weight=50,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="html",
+                        weight=50,
+                        tests=[TestConfig(file="index.html", name="check_bootstrap_usage")]
+                    )
+                ]
+            )
+        )
+        assert criteria.test_library == "web_dev"
+        assert criteria.base.weight == 100
+        assert criteria.bonus.weight == 40
+        assert criteria.penalty.weight == 50
+
+
+class TestSchemaIntegration:
+    """Integration tests with the actual criteria_schema.json file"""
+
+    def test_parse_full_schema(self, criteria_config):
+        """Test that the full schema parses successfully"""
+        assert isinstance(criteria_config, CriteriaConfig)
+        assert criteria_config.test_library == "web_dev"
+
+    def test_base_category_parsed(self, criteria_config):
+        """Test that base category is parsed correctly"""
+        assert criteria_config.base is not None
+        assert criteria_config.base.weight == 100
+        assert len(criteria_config.base.subjects) == 2
+
+    def test_html_subject_structure(self, criteria_config):
+        """Test HTML subject structure"""
+        html_subject = criteria_config.base.subjects[0]
+        assert html_subject.subject_name == "html"
+        assert html_subject.weight == 60
+        assert len(html_subject.subjects) == 2  # structure and link
+
+    def test_html_structure_subject(self, criteria_config):
+        """Test HTML structure subject"""
+        html_subject = criteria_config.base.subjects[0]
+        structure_subject = html_subject.subjects[0]
+        assert structure_subject.subject_name == "structure"
+        assert structure_subject.weight == 40
+        assert len(structure_subject.tests) == 12
+
+    def test_html_link_subject(self, criteria_config):
+        """Test HTML link subject"""
+        html_subject = criteria_config.base.subjects[0]
+        link_subject = html_subject.subjects[1]
+        assert link_subject.subject_name == "link"
+        assert link_subject.weight == 20
+        assert len(link_subject.tests) == 2
+
+    def test_css_subject_structure(self, criteria_config):
+        """Test CSS subject structure"""
+        css_subject = criteria_config.base.subjects[1]
+        assert css_subject.subject_name == "css"
+        assert css_subject.weight == 40
+        assert len(css_subject.subjects) == 2  # responsivity and style
+
+    def test_css_responsivity_subject(self, criteria_config):
+        """Test CSS responsivity subject"""
+        css_subject = criteria_config.base.subjects[1]
+        responsivity_subject = css_subject.subjects[0]
+        assert responsivity_subject.subject_name == "responsivity"
+        assert responsivity_subject.weight == 50
+        assert len(responsivity_subject.tests) == 3
+
+    def test_css_style_subject(self, criteria_config):
+        """Test CSS style subject"""
+        css_subject = criteria_config.base.subjects[1]
+        style_subject = css_subject.subjects[1]
+        assert style_subject.subject_name == "style"
+        assert style_subject.weight == 50
+        assert len(style_subject.tests) == 7
+
+    def test_bonus_category_parsed(self, criteria_config):
+        """Test that bonus category is parsed correctly"""
+        assert criteria_config.bonus is not None
+        assert criteria_config.bonus.weight == 40
+        assert len(criteria_config.bonus.subjects) == 2
+
+    def test_accessibility_bonus_subject(self, criteria_config):
+        """Test accessibility bonus subject"""
+        accessibility_subject = criteria_config.bonus.subjects[0]
+        assert accessibility_subject.subject_name == "accessibility"
+        assert accessibility_subject.weight == 20
+        assert len(accessibility_subject.tests) == 1
+
+    def test_head_detail_bonus_subject(self, criteria_config):
+        """Test head_detail bonus subject"""
+        head_detail_subject = criteria_config.bonus.subjects[1]
+        assert head_detail_subject.subject_name == "head_detail"
+        assert head_detail_subject.weight == 80
+        assert len(head_detail_subject.tests) == 7
+
+    def test_penalty_category_parsed(self, criteria_config):
+        """Test that penalty category is parsed correctly"""
+        assert criteria_config.penalty is not None
+        assert criteria_config.penalty.weight == 50
+        assert len(criteria_config.penalty.subjects) == 2
+
+    def test_html_penalty_subject(self, criteria_config):
+        """Test HTML penalty subject"""
+        html_penalty_subject = criteria_config.penalty.subjects[0]
+        assert html_penalty_subject.subject_name == "html"
+        assert html_penalty_subject.weight == 50
+        assert len(html_penalty_subject.tests) == 6
+
+    def test_project_structure_penalty_subject(self, criteria_config):
+        """Test project_structure penalty subject"""
+        project_structure_subject = criteria_config.penalty.subjects[1]
+        assert project_structure_subject.subject_name == "project_structure"
+        assert project_structure_subject.weight == 50
+        assert len(project_structure_subject.tests) == 3
+
+    def test_test_config_structure(self, criteria_config):
+        """Test that test configs are structured correctly"""
+        # Get first test from structure subject
+        html_subject = criteria_config.base.subjects[0]
+        structure_subject = html_subject.subjects[0]
+        first_test = structure_subject.tests[0]
+
+        assert first_test.file == "index.html"
+        assert first_test.name == "has_tag"
+        assert len(first_test.parameters) == 2
+        assert first_test.parameters[0].name == "tag"
+        assert first_test.parameters[0].value == "body"
+        assert first_test.parameters[1].name == "required_count"
+        assert first_test.parameters[1].value == 1
+
+    def test_test_without_parameters(self, criteria_config):
+        """Test parsing of tests without parameters"""
+        # Get check_css_linked test from link subject
+        html_subject = criteria_config.base.subjects[0]
+        link_subject = html_subject.subjects[1]
+        check_css_test = link_subject.tests[0]
+
+        assert check_css_test.file == "index.html"
+        assert check_css_test.name == "check_css_linked"
+        assert check_css_test.parameters is None or len(check_css_test.parameters) == 0
+
+    def test_parameter_value_types(self, criteria_config):
+        """Test that parameter values maintain correct types"""
+        html_subject = criteria_config.base.subjects[0]
+        structure_subject = html_subject.subjects[0]
+
+        # Check string value
+        tag_param = structure_subject.tests[0].parameters[0]
+        assert isinstance(tag_param.value, str)
+
+        # Check integer value
+        count_param = structure_subject.tests[0].parameters[1]
+        assert isinstance(count_param.value, int)
+
+    def test_from_json_method(self, criteria_schema_path):
+        """Test parsing from JSON string"""
+        with open(criteria_schema_path, 'r') as f:
+            json_str = f.read()
+
+        criteria = CriteriaConfig.from_json(json_str)
+        assert isinstance(criteria, CriteriaConfig)
+        assert criteria.test_library == "web_dev"
+
+    def test_from_dict_method(self, criteria_schema_dict):
+        """Test parsing from dictionary"""
+        criteria = CriteriaConfig.from_dict(criteria_schema_dict)
+        assert isinstance(criteria, CriteriaConfig)
+        assert criteria.test_library == "web_dev"
+
+    def test_round_trip_serialization(self, criteria_config):
+        """Test that we can serialize and deserialize the config"""
+        # Convert to dict
+        config_dict = criteria_config.model_dump()
+
+        # Parse back from dict
+        reparsed = CriteriaConfig.from_dict(config_dict)
+
+        # Verify they match
+        assert reparsed.test_library == criteria_config.test_library
+        assert reparsed.base.weight == criteria_config.base.weight
+        assert len(reparsed.base.subjects) == len(criteria_config.base.subjects)
+
+    def test_weight_validation(self):
+        """Test that weight validation works"""
+        with pytest.raises(ValueError):
+            SubjectConfig(
+                subject_name="invalid",
+                weight=150,  # Over 100
+                tests=[TestConfig(file="test.html", name="test")]
+            )
+
+        with pytest.raises(ValueError):
+            SubjectConfig(
+                subject_name="invalid",
+                weight=-10,  # Negative
+                tests=[TestConfig(file="test.html", name="test")]
+            )
+
+    def test_extra_fields_forbidden(self):
+        """Test that extra fields are forbidden at all levels"""
+        # Test at criteria level
+        with pytest.raises(ValueError):
+            CriteriaConfig(
+                test_library="web_dev",
+                base=CategoryConfig(
+                    weight=100,
+                    subjects=[
+                        SubjectConfig(
+                            subject_name="html",
+                            weight=60,
+                            tests=[TestConfig(file="index.html", name="has_tag")]
+                        )
+                    ]
+                ),
+                extra_field="not allowed"
+            )
+
+
+class TestWeightCalculations:
+    """Test weight-related calculations and validations"""
+
+    def test_subject_weights_sum(self, criteria_config):
+        """Verify that subject weights in base category sum correctly"""
+        base_subjects = criteria_config.base.subjects
+        total_weight = sum(subject.weight for subject in base_subjects)
+        assert total_weight == 100  # html=60 + css=40
+
+    def test_nested_subject_weights_sum(self, criteria_config):
+        """Verify that nested subject weights sum correctly"""
+        html_subject = criteria_config.base.subjects[0]
+        nested_subjects = html_subject.subjects
+        total_weight = sum(subject.weight for subject in nested_subjects)
+        assert total_weight == 60  # structure=40 + link=20
+
+        css_subject = criteria_config.base.subjects[1]
+        nested_subjects = css_subject.subjects
+        total_weight = sum(subject.weight for subject in nested_subjects)
+        assert total_weight == 100  # responsivity=50 + style=50
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
+
diff --git a/tests/unit/test_facade.py b/tests/unit/test_facade.py
deleted file mode 100644
index 5057222..0000000
--- a/tests/unit/test_facade.py
+++ /dev/null
@@ -1,236 +0,0 @@
-import unittest
-from unittest.mock import patch, Mock
-
-from autograder.autograder_facade import Autograder
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.core.models.autograder_response import AutograderResponse
-from autograder.core.models.result import Result
-
-
-class TestAutograderFacade(unittest.TestCase):
-
-    def setUp(self):
-        # Common test data
-        self.mock_submission = {"file.py": "print('hello')"}
-        self.mock_criteria = {"base": {"subjects": {"test": {"tests": ["passing_test"]}}}}
-        self.mock_feedback_prefs = {"general": {}}
-
-        self.mock_assignment_config = AssignmentConfig(
-            criteria=self.mock_criteria,
-            feedback=self.mock_feedback_prefs,
-            setup={},
-            template="web dev"
-        )
-
-        # A standard successful result from the Grader
-        self.mock_grader_result = Result(
-            final_score=85.0,
-            author="test_student",
-            submission_file=self.mock_submission,
-            base_results=[], bonus_results=[], penalty_results=[]
-        )
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    @patch('autograder.autograder_facade.Reporter')
-    def test_grade_success_default_feedback(self, mock_reporter, mock_grader, mock_template_library,
-                                            mock_criteria_tree):
-        """A successful grading run that returns generated default feedback."""
-        # Arrange
-        # Create a fake template object with the attributes the facade expects
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        fake_reporter = Mock()
-        fake_reporter.generate_feedback.return_value = "Great job!"
-        mock_reporter.create_default_reporter.return_value = fake_reporter
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=True,
-            feedback_mode="default"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertIsInstance(response, AutograderResponse)
-        self.assertEqual(response.status, "Success")
-        self.assertEqual(response.final_score, 85.0)
-        self.assertEqual(response.feedback, "Great job!")
-
-        mock_template_library.get_template.assert_called_once_with("web dev")
-        mock_criteria_tree.build_non_executed_tree.assert_called_once()
-        mock_grader.return_value.run.assert_called_once()
-        mock_reporter.create_default_reporter.assert_called_once()
-
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    def test_grade_failure_invalid_template(self, mock_template_library):
-        """If TemplateLibrary returns None, the facade should fail with an informative message."""
-        # Arrange
-        mock_template_library.get_template.return_value = None
-
-        invalid_config = AssignmentConfig(
-            criteria = self.mock_criteria,
-            feedback = self.mock_feedback_prefs,
-            setup = {},
-            template="invalid template"
-        )
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=invalid_config,
-            student_name="student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("Unsupported template: invalid template", response.feedback)
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    def test_grade_failure_during_grading(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """If the Grader raises an exception the facade should return a failure response containing the error."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.side_effect = Exception("Something went wrong in the grader")
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("Something went wrong in the grader", response.feedback)
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    def test_grade_failure_ai_missing_credentials(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """AI feedback mode without the required keys should fail with an explanatory message."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=True,
-            feedback_mode="ai",
-            openai_key=None  # missing keys
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("OpenAI key, Redis URL, and Redis token are required", response.feedback)
-
-    @patch('autograder.autograder_facade.PreFlight')
-    def test_preflight_failure_stops_processing(self, mock_preflight):
-        """If pre-flight returns impediments, grading should stop and return those messages."""
-        # Arrange
-        mock_preflight.run.return_value = [{'message': 'setup failed due to X'}]
-
-        config_with_setup = AssignmentConfig(
-             criteria=self.mock_criteria,
-             feedback=self.mock_feedback_prefs,
-             setup={'cmds': []},
-             template="web dev"
-        )
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=config_with_setup,
-            student_name="student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn('setup failed due to X', response.feedback)
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    def test_no_feedback_requested_returns_score_only(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """When include_feedback is False, the facade should return the score and an empty feedback string."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=False
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "Success")
-        self.assertEqual(response.final_score, 85.0)
-        self.assertEqual(response.feedback, "")
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/unit/test_pipeline_steps.py b/tests/unit/test_pipeline_steps.py
new file mode 100644
index 0000000..ac7e931
--- /dev/null
+++ b/tests/unit/test_pipeline_steps.py
@@ -0,0 +1,284 @@
+"""
+Unit tests for BuildTreeStep and GradeStep.
+
+These tests verify:
+1. BuildTreeStep correctly builds a CriteriaTree from config
+2. GradeStep intelligently handles both CriteriaTree and Template inputs
+3. Single vs multi-submission pipeline modes work correctly
+"""
+
+import sys
+from pathlib import Path
+from typing import List
+
+from autograder.models.dataclass.param_description import ParamDescription
+from autograder.pipeline import AutograderPipeline
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from autograder.steps.build_tree_step import BuildTreeStep
+from autograder.steps.grade_step import GradeStep
+from autograder.models.dataclass.step_result import StepStatus
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.test_result import TestResult
+
+
+# Mock Template and TestFunction for testing
+class MockTestFunction(TestFunction):
+    """Mock test function that always passes."""
+
+    def __init__(self, test_name):
+        self._test_name = test_name
+
+    @property
+    def name(self):
+        return self._test_name
+
+    @property
+    def description(self):
+        return f"Mock test function: {self._test_name}"
+
+    @property
+    def parameter_description(self) -> List[ParamDescription]:
+        return []
+
+    def execute(self, *args, **kwargs):
+        """Always return a passing result."""
+        return TestResult(
+            test_name=self._test_name,
+            score=1000,
+            report="Test passed",
+            parameters=None
+        )
+
+
+class MockTemplate(Template):
+    """Mock template with pre-defined test functions."""
+
+    def __init__(self):
+        self.name = "mock_template"
+        self._tests = {
+            "expect_output": MockTestFunction("expect_output"),
+            "check_file": MockTestFunction("check_file"),
+            "validate_input": MockTestFunction("validate_input"),
+        }
+
+    @property
+    def template_name(self):
+        """Get template name."""
+        return "mock_template"
+
+    @property
+    def template_description(self):
+        """Get template description."""
+        return "Mock template for testing purposes"
+
+    @property
+    def requires_pre_executed_tree(self) -> bool:
+        """Mock templates don't require pre-executed trees."""
+        return False
+
+    @property
+    def requires_execution_helper(self) -> bool:
+        """Mock templates don't require execution helpers."""
+        return False
+
+    @property
+    def execution_helper(self):
+        """No execution helper needed for mocks."""
+        return None
+
+    def stop(self):
+        """No cleanup needed for mock templates."""
+        pass
+
+    def get_test(self, test_name: str):
+        """Get a test function by name."""
+        return self._tests.get(test_name)
+
+    def get_available_tests(self):
+        """Get list of available test names."""
+        return list(self._tests.keys())
+
+
+def create_simple_criteria():
+    """Create a simple criteria configuration for testing."""
+    return {
+        "test_library": "input_output",
+        "base": {
+            "weight": 100,
+            "subjects": [
+                {
+                    "subject_name": "Basic Tests",
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "name": "expect_output",
+                            "file": "main.py",
+                            "parameters": [
+                                {"name": "stdin_input", "value": ["hello"]},
+                                {"name": "expected_output", "value": "hello"},
+                            ],
+                        },
+                        {
+                            "name": "expect_output",
+                            "file": "main.py",
+                            "parameters": [
+                                {"name": "stdin_input", "value": ["world"]},
+                                {"name": "expected_output", "value": "world"},
+                            ],
+                        },
+                    ],
+                }
+            ],
+        },
+        "bonus": {
+            "weight": 10,
+            "tests": [
+                {
+                    "name": "expect_output",
+                    "file": "main.py",
+                    "parameters": [
+                        {"name": "stdin_input", "value": ["bonus"]},
+                        {"name": "expected_output", "value": "bonus"},
+                    ],
+                }
+            ],
+        },
+    }
+
+
+def create_mock_submission():
+    """Create mock submission files."""
+    return {"main.py": "# Simple echo program\nprint(input())"}
+
+
+def test_build_tree_step():
+    """Test that BuildTreeStep correctly builds a CriteriaTree."""
+    print("\n" + "=" * 80)
+    print("TEST: BuildTreeStep")
+    print("=" * 80)
+
+    # Create criteria and template
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+
+    # Create and execute step
+    build_step = BuildTreeStep(criteria)
+    result = build_step.execute(template)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Build step failed: {result.error}"
+    assert result.data is not None, "CriteriaTree is None"
+
+    criteria_tree = result.data
+
+    # Verify tree structure
+    assert criteria_tree.base is not None, "Base category missing"
+    assert criteria_tree.bonus is not None, "Bonus category missing"
+
+    print("✓ BuildTreeStep successfully built CriteriaTree")
+    print(f"  - Base category: {criteria_tree.base.name}")
+    print(f"  - Bonus category: {criteria_tree.bonus.name}")
+
+    # Print tree structure
+    print("\nCriteria Tree Structure:")
+    criteria_tree.print_tree()
+
+    return criteria_tree
+
+
+def test_grade_from_tree():
+    """Test that GradeStep can grade from a CriteriaTree."""
+    print("\n" + "=" * 80)
+    print("TEST: GradeStep with CriteriaTree (Multi-Submission Mode)")
+    print("=" * 80)
+
+    # Build criteria tree first
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+    build_step = BuildTreeStep(criteria)
+    build_result = build_step.execute(template)
+
+    criteria_tree = build_result.data
+    submission_files = create_mock_submission()
+
+    # Create and execute grade step
+    grade_step = GradeStep(
+        submission_files=submission_files)
+
+    result = grade_step.execute(criteria_tree)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
+    assert result.data is not None, "GradingResult is None"
+
+    grading_result = result.data
+
+    print("✓ GradeStep successfully graded from CriteriaTree")
+    print(f"  - Final Score: {grading_result.final_score}")
+    print(f"  - Status: {grading_result.status}")
+
+    # Print result tree
+    if grading_result.result_tree:
+        print("\nResult Tree:")
+        grading_result.result_tree.print_tree()
+
+    return grading_result
+
+def test_invalid_input_type():
+    """Test that GradeStep rejects invalid input types."""
+    print("\n" + "=" * 80)
+    print("TEST: GradeStep with Invalid Input Type")
+    print("=" * 80)
+
+    submission_files = create_mock_submission()
+
+    grade_step = GradeStep(
+        submission_files=submission_files)
+
+    # Try to execute with invalid input (string)
+    result = grade_step.execute("invalid input")
+
+    # Verify it fails gracefully
+    assert result.status == StepStatus.FAIL, "Should fail with invalid input"
+    assert result.error is not None, "Should have error message"
+
+    print("✓ GradeStep correctly rejected invalid input")
+    print(f"  - Error: {result.error}")
+
+
+def test_build_tree_and_grade_pipeline():
+    """Test full pipeline: BuildTreeStep followed by GradeStep."""
+    print("\n" + "=" * 80)
+    print("TEST: Full Pipeline (BuildTreeStep + GradeStep)")
+    print("=" * 80)
+
+    # Create criteria and template
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+    submission_files = create_mock_submission()
+
+    # Build tree
+    build_step = BuildTreeStep(criteria)
+    # Grade submission
+    grade_step = GradeStep(
+        submission_files=submission_files)
+
+    pipeline = AutograderPipeline()
+    pipeline.add_step(build_step)
+    pipeline.add_step(grade_step)
+    grading_result = pipeline.run(input_data=template)
+
+    # Verify result
+    assert grading_result.status == "success", f"Pipeline failed: {grading_result.error}"
+    print("✓ Full pipeline successfully built tree and graded submission")
+    print(f"  - Final Score: {grading_result.final_score}")
+
+    # Print result tree
+    if grading_result.result_tree:
+        print("\nResult Tree:")
+        grading_result.result_tree.print_tree()
\ No newline at end of file