From 92cbc957634a0b9bf0512aacb5244129ac84cdf4 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Tue, 23 Dec 2025 09:59:48 -0300
Subject: [PATCH 01/49] feat: first glance of pipeline architecture

---
 autograder/autograder.py                      |  35 ++
 autograder/autograder_facade.py               | 415 ------------------
 autograder/context.py                         |  32 --
 autograder/core/report/fatal_report.py        | 116 -----
 autograder/core/report/reporter_factory.py    |  16 -
 .../config_schemas/ai_feedback_schema.json    |   0
 .../config_schemas/criteria_schema.json       | 196 ---------
 .../config_schemas/feedback_schema.json       |   0
 autograder/core/utils/__init__.py             |   0
 autograder/core/utils/result_processor.py     |  31 --
 autograder/{builder => models}/__init__.py    |   0
 .../abstract}/__init__.py                     |   0
 autograder/models/abstract/step.py            |   9 +
 .../models => models/abstract}/template.py    |   0
 .../abstract}/test_function.py                |   0
 .../{builder => }/models/criteria_tree.py     |   5 +-
 .../models => models/dataclass}/__init__.py   |   0
 .../dataclass}/autograder_response.py         |  13 +-
 .../dataclass}/feedback_preferences.py        |  46 +-
 autograder/models/dataclass/grading_result.py |  13 +
 .../dataclass}/param_description.py           |   0
 .../models => models/dataclass}/result.py     |  17 +-
 autograder/models/dataclass/step_result.py    |  14 +
 .../dataclass}/test_result.py                 |   7 +-
 autograder/pipeline.py                        |  36 ++
 .../template_library => services}/__init__.py |   0
 .../criteria_tree_service.py}                 |   2 +-
 .../grader.py => services/grader_service.py}  |  11 +-
 .../pre_flight_service.py}                    |   2 +-
 .../templates => services/report}/__init__.py |   0
 .../{core => services}/report/ai_reporter.py  |   6 +-
 .../report/base_reporter.py                   |   0
 .../report/default_reporter.py                |   4 +-
 .../services/report/reporter_factory.py       |  27 ++
 .../template_library_service.py}              |  14 +-
 .../utils => services}/upstash_driver.py      |   2 +-
 autograder/{core => steps}/__init__.py        |   0
 autograder/steps/build_tree_step.py           |  13 +
 autograder/steps/export_step.py               |   9 +
 autograder/steps/feedback_step.py             |  15 +
 autograder/steps/grade_step.py                |  16 +
 autograder/steps/load_template_step.py        |  12 +
 autograder/steps/pre_flight_step.py           |  10 +
 .../grading => template_library}/__init__.py  |   0
 .../api_testing.py                            |   6 +-
 .../essay_grader.py                           |   4 +-
 .../input_output.py                           |   9 +-
 .../templates => template_library}/web_dev.py |   2 +-
 autograder/{core/models => utils}/__init__.py |   0
 .../report => utils/executors}/__init__.py    |   0
 .../executors/ai_executor.py}                 |   4 +-
 .../executors}/sandbox_executor.py            |   0
 .../{core => }/utils/secrets_fetcher.py       |   0
 connectors/adapters/__init__.py               |   0
 connectors/adapters/api/__init__.py           |   0
 connectors/adapters/api/api_adapter.py        | 124 ------
 .../github_action_adapter/__init__.py         |   0
 .../github_action_adapter/github_adapter.py   | 239 ----------
 .../api_entrypoint.py => api_connector.py}    |   2 +-
 ...thub_entrypoint.py => github_connector.py} |   2 +-
 connectors/models/__init__.py                 |   0
 connectors/models/assignment_config.py        |  37 --
 connectors/models/autograder_request.py       |  39 --
 connectors/port.py                            |  46 --
 docs/system/creating_assignments.md           |  68 +--
 docs/system/execution/execution_helpers.md    |   4 +-
 tests/data/custom_template/custom_template.py |   2 +-
 tests/data/web_dev/criteria.json              |   3 +-
 tests/unit/builder/test_tree.py               |   2 +-
 .../core/reporter/test_default_reporter.py    |   8 +-
 tests/unit/core/test_grader.py                |   9 +-
 tests/unit/test_facade.py                     |   4 +-
 72 files changed, 339 insertions(+), 1419 deletions(-)
 create mode 100644 autograder/autograder.py
 delete mode 100644 autograder/autograder_facade.py
 delete mode 100644 autograder/context.py
 delete mode 100644 autograder/core/report/fatal_report.py
 delete mode 100644 autograder/core/report/reporter_factory.py
 delete mode 100644 autograder/core/schemas/config_schemas/ai_feedback_schema.json
 delete mode 100644 autograder/core/schemas/config_schemas/criteria_schema.json
 delete mode 100644 autograder/core/schemas/config_schemas/feedback_schema.json
 delete mode 100644 autograder/core/utils/__init__.py
 delete mode 100644 autograder/core/utils/result_processor.py
 rename autograder/{builder => models}/__init__.py (100%)
 rename autograder/{builder/execution_helpers => models/abstract}/__init__.py (100%)
 create mode 100644 autograder/models/abstract/step.py
 rename autograder/{builder/models => models/abstract}/template.py (100%)
 rename autograder/{builder/models => models/abstract}/test_function.py (100%)
 rename autograder/{builder => }/models/criteria_tree.py (98%)
 rename autograder/{builder/models => models/dataclass}/__init__.py (100%)
 rename autograder/{core/models => models/dataclass}/autograder_response.py (54%)
 rename autograder/{core/models => models/dataclass}/feedback_preferences.py (86%)
 create mode 100644 autograder/models/dataclass/grading_result.py
 rename autograder/{builder/models => models/dataclass}/param_description.py (100%)
 rename autograder/{core/models => models/dataclass}/result.py (73%)
 create mode 100644 autograder/models/dataclass/step_result.py
 rename autograder/{core/models => models/dataclass}/test_result.py (83%)
 create mode 100644 autograder/pipeline.py
 rename autograder/{builder/template_library => services}/__init__.py (100%)
 rename autograder/{builder/tree_builder.py => services/criteria_tree_service.py} (99%)
 rename autograder/{core/grading/grader.py => services/grader_service.py} (97%)
 rename autograder/{builder/pre_flight.py => services/pre_flight_service.py} (98%)
 rename autograder/{builder/template_library/templates => services/report}/__init__.py (100%)
 rename autograder/{core => services}/report/ai_reporter.py (98%)
 rename autograder/{core => services}/report/base_reporter.py (100%)
 rename autograder/{core => services}/report/default_reporter.py (97%)
 create mode 100644 autograder/services/report/reporter_factory.py
 rename autograder/{builder/template_library/library.py => services/template_library_service.py} (88%)
 rename autograder/{core/utils => services}/upstash_driver.py (98%)
 rename autograder/{core => steps}/__init__.py (100%)
 create mode 100644 autograder/steps/build_tree_step.py
 create mode 100644 autograder/steps/export_step.py
 create mode 100644 autograder/steps/feedback_step.py
 create mode 100644 autograder/steps/grade_step.py
 create mode 100644 autograder/steps/load_template_step.py
 create mode 100644 autograder/steps/pre_flight_step.py
 rename autograder/{core/grading => template_library}/__init__.py (100%)
 rename autograder/{builder/template_library/templates => template_library}/api_testing.py (98%)
 rename autograder/{builder/template_library/templates => template_library}/essay_grader.py (98%)
 rename autograder/{builder/template_library/templates => template_library}/input_output.py (97%)
 rename autograder/{builder/template_library/templates => template_library}/web_dev.py (99%)
 rename autograder/{core/models => utils}/__init__.py (100%)
 rename autograder/{core/report => utils/executors}/__init__.py (100%)
 rename autograder/{builder/execution_helpers/AI_Executor.py => utils/executors/ai_executor.py} (99%)
 rename autograder/{builder/execution_helpers => utils/executors}/sandbox_executor.py (100%)
 rename autograder/{core => }/utils/secrets_fetcher.py (100%)
 delete mode 100644 connectors/adapters/__init__.py
 delete mode 100644 connectors/adapters/api/__init__.py
 delete mode 100644 connectors/adapters/api/api_adapter.py
 delete mode 100644 connectors/adapters/github_action_adapter/__init__.py
 delete mode 100644 connectors/adapters/github_action_adapter/github_adapter.py
 rename connectors/{adapters/api/api_entrypoint.py => api_connector.py} (99%)
 rename connectors/{adapters/github_action_adapter/github_entrypoint.py => github_connector.py} (97%)
 delete mode 100644 connectors/models/__init__.py
 delete mode 100644 connectors/models/assignment_config.py
 delete mode 100644 connectors/models/autograder_request.py
 delete mode 100644 connectors/port.py

diff --git a/autograder/autograder.py b/autograder/autograder.py
new file mode 100644
index 0000000..ef92735
--- /dev/null
+++ b/autograder/autograder.py
@@ -0,0 +1,35 @@
+from autograder.services.report.reporter_factory import ReporterFactory
+from autograder.services.upstash_driver import UpstashDriver
+from autograder.pipeline import AutograderPipeline
+from autograder.steps.export_step import ExporterStep
+from autograder.steps.feedback_step import FeedbackStep
+from autograder.steps.grade_step import GradeStep
+from autograder.steps.load_template_step import TemplateLoaderStep
+from autograder.steps.pre_flight_step import PreFlightStep
+from autograder.steps.build_tree_step import BuildTreeStep
+
+
+def build_pipeline(
+                 template_name,
+                 include_feedback,
+                 grading_criteria,
+                 feedback_config,
+                 setup_config = None,
+                 custom_template = None,
+                 feedback_mode = None):
+
+    pipeline = AutograderPipeline()
+    if setup_config:
+        pipeline.add_step(PreFlightStep(setup_config))
+    pipeline.add_step(TemplateLoaderStep(template_name,custom_template))
+    pipeline.add_step(BuildTreeStep(grading_criteria))
+    pipeline.add_step(GradeStep())
+    if include_feedback:
+        reporter_service = ReporterFactory.create_reporter_for(feedback_mode)
+        pipeline.add_step(FeedbackStep(reporter_service,feedback_config))
+    pipeline.add_step(ExporterStep(UpstashDriver)) # Placeholder for remote driver
+    return pipeline
+
+
+
+
diff --git a/autograder/autograder_facade.py b/autograder/autograder_facade.py
deleted file mode 100644
index 5a9f666..0000000
--- a/autograder/autograder_facade.py
+++ /dev/null
@@ -1,415 +0,0 @@
-import logging
-
-from autograder.builder.models.template import Template
-from autograder.context import request_context
-from autograder.core.grading.grader import Grader
-from autograder.core.models.autograder_response import AutograderResponse
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.models.result import Result
-from autograder.core.report.reporter_factory import Reporter
-from autograder.core.utils.upstash_driver import Driver
-from connectors.models.assignment_config import AssignmentConfig
-from connectors.models.autograder_request import AutograderRequest
-from autograder.builder.tree_builder import CriteriaTree
-from autograder.builder.template_library.library import TemplateLibrary
-
-
-from autograder.builder.pre_flight import PreFlight
-
-logger = logging.getLogger(__name__)
-
-class Autograder:
-
-    # Static member that's accessible by all methods
-    selected_template : Template = None
-    feedback_preferences: FeedbackPreferences = None
-
-    @staticmethod
-    def grade(autograder_request: AutograderRequest):
-        logger.info("Starting autograder process")
-
-        # Set the request in the global context at the beginning of the process
-        request_context.set_request(autograder_request)
-        if autograder_request.openai_key:
-            logger.info("OpenAI key provided, AI feedback mode may be used")
-            logger.info("Setting environment variable for OpenAI key")
-            import os
-            os.environ["OPENAI_API_KEY"] = autograder_request.openai_key
-        try:
-
-            # Step 1: Handle Pre-flight checks if setup is defined
-            if autograder_request.assignment_config.setup:
-                Autograder._pre_flight_step()
-
-            # Step 2: Get test template
-            logger.info("Importing test template")
-            Autograder._import_template_step()
-
-            # Step 3: Build criteria tree
-            logger.info("Building criteria tree from assignment configuration:")
-            Autograder._build_criteria_step()
-
-            # Step 4: Initialize and run grader
-            logger.info("Starting grading process")
-            result = Autograder._start_and_run_grader()
-            logger.info(f"Grading completed. Final score: {result.final_score}")
-            
-            if autograder_request.redis_token and autograder_request.redis_url:
-              Autograder.export_final_score(result.final_score)
-
-            if autograder_request.include_feedback:
-                # Step 5: Setup feedback preferences
-                logger.info("Processing feedback preferences")
-                Autograder._setup_feedback_pref()
-                logger.debug(f"Feedback mode: {autograder_request.feedback_mode}")
-
-                # Step 6: Create reporter based on feedback mode
-                Autograder.create_feedback_report(result)
-
-                # Step 7: Generate feedback
-                logger.info("Generating feedback report")
-                feedback_report = Autograder._generate_feedback()
-                logger.info("Feedback report generated successfully")
-
-
-                # Step 8: Create and return the successful response
-                logger.info("Creating successful autograder response")
-                response = AutograderResponse(
-                    status = "Success",
-                    final_score = result.final_score,
-                    feedback = feedback_report,
-                    test_report = result.get_test_report()
-                )
-                logger.info("Autograder process completed successfully")
-                return response
-            else:
-                logger.info("Feedback not requested, returning score only")
-                return AutograderResponse(
-                     status="Success",
-                     final_score=result.final_score,
-                     feedback="",
-                     test_report=result.get_test_report()
-                )
-
-
-        except Exception as e:
-            # Catch any exception, log it, and return a failure response
-            error_message = f"An unexpected error occurred during the grading process: {str(e)}"
-            logger.error(error_message)
-            logger.exception("Full exception traceback:")
-            return AutograderResponse(status="fail", final_score=0.0, feedback=error_message, test_report=[])
-
-    @staticmethod
-    def _pre_flight_step():
-
-         if request_context.get_request() and request_context.get_request().assignment_config.setup:
-                logger.info("Running pre-flight setup commands")
-                impediments = PreFlight.run()
-                if impediments:
-                     error_messages = [impediment['message'] for impediment in impediments]
-                     error_text = "\n".join(error_messages)
-                     logger.error(f"Pre-flight checks failed with errors: {error_messages}")
-                     raise RuntimeError(error_text)
-
-         logger.info("Pre-flight setup completed with no impediments")
-
-
-
-    @staticmethod
-    def _import_template_step():
-        req = request_context.get_request()
-        template_name = req.assignment_config.template
-        if template_name == "custom":
-            logger.info(f"Loading custom test template provided!")
-            test_template = TemplateLibrary.get_template(template_name,req.assignment_config.custom_template_str)
-        else:
-            logger.info(f"Loading test template: '{template_name}'")
-            test_template = TemplateLibrary.get_template(template_name)
-        if test_template is None:
-            logger.error(f"Template '{template_name}' not found in TemplateLibrary")
-            raise ValueError(f"Unsupported template: {template_name}")
-
-        logger.info(f"Test template '{test_template.template_name}' instantiated successfully")
-        Autograder.selected_template = test_template
-
-
-    @staticmethod
-    def _build_criteria_step():
-        req = request_context.get_request()
-        test_template = Autograder.selected_template
-
-        if test_template.requires_pre_executed_tree:
-            logger.info("Template requires pre-executed criteria tree.")
-            criteria_tree = CriteriaTree.build_pre_executed_tree(test_template)
-            criteria_tree.print_pre_executed_tree()
-        else:
-            logger.info("Template does not require pre-executed criteria tree.")
-            criteria_tree = CriteriaTree.build_non_executed_tree()
-
-        test_template.stop()
-        criteria_tree.print_pre_executed_tree()
-        logger.info("Criteria tree built successfully")
-
-        req.criteria_tree = criteria_tree
-        return criteria_tree
-
-    @staticmethod
-    def _start_and_run_grader():
-        req = request_context.get_request()
-        criteria_tree = req.criteria_tree
-        test_template = Autograder.selected_template
-
-
-        logger.info("Initializing grader with criteria tree and test template")
-        grader = Grader(criteria_tree, test_template)
-        logger.debug(f"Grader initialized for student: {req.student_name}")
-
-
-        logger.info(f"Running grading process")
-
-        result = grader.run()
-
-        return result
-
-    @staticmethod
-    def export_final_score(final_score):
-        req = request_context.get_request()
-        student_credentials = req.student_credentials
-        if req.redis_token and req.redis_url:
-            logger.info("Sending final score to Redis")
-            driver = Driver.create(req.redis_token, req.redis_url)
-            if driver is not None:
-                if driver.user_exists(student_credentials):
-                    driver.set_score(student_credentials, final_score)
-                else:
-                    driver.create_user(student_credentials)
-                    driver.set_score(student_credentials, final_score)
-                logger.info("Final score sent to Redis successfully")
-
-    @staticmethod
-    def _setup_feedback_pref():
-        feedback = FeedbackPreferences.from_dict()
-        Autograder.feedback_preferences = feedback
-
-    @staticmethod
-    def create_feedback_report(result: Result):
-
-        req = request_context.get_request()
-        template = Autograder.selected_template
-        feedback = Autograder.feedback_preferences
-        feedback_mode = req.feedback_mode
-
-
-        if feedback_mode == "default":
-            logger.info("Creating default reporter")
-            reporter = Reporter.create_default_reporter(result, feedback,template)
-            logger.info("Default reporter created successfully")
-
-        elif feedback_mode == "ai":
-            logger.info("Creating AI reporter")
-
-            if not all(
-                    [req.openai_key,req.redis_url, req.redis_token]):
-                error_msg = "OpenAI key, Redis URL, and Redis token are required for AI feedback mode."
-                logger.error(error_msg)
-                raise ValueError(error_msg)
-
-            logger.info("All AI requirements validated successfully")
-
-            # Setup Redis driver
-            driver = Driver.create(req.redis_token, req.redis_url)
-            student_credentials = req.student_credentials
-
-
-            if not driver.user_exists(student_credentials):
-                driver.create_user(student_credentials)
-
-            if driver.decrement_user_quota(student_credentials):
-                quota = driver.get_user_quota(student_credentials)
-                logger.info(f"Quota check passed. Remaining quota: {quota}")
-                reporter = Reporter.create_ai_reporter(result,feedback, template, quota)
-            else:
-                logger.warning("Quota exceeded for student, falling back to default feedback.")
-                reporter = Reporter.create_default_reporter(result, feedback,template)
-
-        else:
-                raise ValueError(f"Unsupported feedback mode: {feedback_mode}")
-
-        req.reporter = reporter
-        return reporter
-
-    @staticmethod
-    def _generate_feedback():
-        req = request_context.get_request()
-        reporter = req.reporter
-        feedback_report = reporter.generate_feedback()
-        req.feedback_report = feedback_report
-        return feedback_report
-
-
-
-if __name__ == "__main__":
-    if __name__ == "__main__":
-        logging.basicConfig(level=logging.INFO)
-
-        # 1. Define submission files for web dev
-        submission_files = {
-            "index.html": """
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <title>Test Page</title>
-        <link rel="stylesheet" href="style.css">
-    </head>
-    <body>
-        <header>
-            <h1>Welcome</h1>
-        </header>
-        <main>
-            <p>This is a paragraph.</p>
-            <img src="image.jpg" alt="A descriptive alt text">
-        </main>
-        <footer>
-            <p>&copy; 2025</p>
-        </footer>
-    </body>
-    </html>
-            """,
-            "style.css": """
-    body {
-        font-family: sans-serif;
-        margin: 20px;
-    }
-    header {
-        background-color: #f0f0f0;
-        padding: 1em;
-    }
-            """
-        }
-
-        # 2. Define criteria_json for web dev
-        criteria_json = {
-            "test_library": "web_dev",  # Match the template name
-            "base": {
-                "weight": 100,
-                "subjects": {
-                    "html_structure": {
-                        "weight": 70,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "has_tag",
-                                "calls": [
-                                    ["head", 1],
-                                    ["body", 1],
-                                    ["header", 1],
-                                    ["main", 1],
-                                    ["footer", 1]
-                                ]
-                            },
-                            {
-                                "file": "index.html",
-                                "name": "check_css_linked"
-                            }
-                        ]
-                    },
-                    "accessibility": {
-                        "weight": 30,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "check_all_images_have_alt"
-                            }
-                        ]
-                    }
-                }
-            },
-            "bonus": {
-                "weight": 20,  # Example bonus weight
-                "subjects": {
-                    "best_practices": {
-                        "weight": 100,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "uses_semantic_tags"
-                            }
-                        ]
-                    }
-                }
-            },
-            "penalty": {
-                "weight": 10,  # Example penalty weight
-                "subjects": {
-                    "bad_practices": {
-                        "weight": 100,
-                        "tests": [
-                            {
-                                "file": "index.html",
-                                "name": "check_no_inline_styles"
-                            }
-                        ]
-                    }
-                }
-            }
-        }
-
-        # 3. Define feedback_json (can be simple or complex)
-        feedback_json = {
-            "general": {
-                "report_title": "Web Dev Assignment Report",
-                "show_score": True
-            },
-            "default": {
-                "category_headers": {
-                    "base": "✅ Core HTML/CSS",
-                    "bonus": "⭐ Best Practices Bonus",
-                    "penalty": "🚨 Points Deducted"
-                }
-            }
-        }
-
-        # 4. Define setup_json with file checks
-        setup_json = {
-            "file_checks": [
-                "index.html",
-                "style.css"
-            ],
-            "commands": []  # No commands needed for static web dev
-        }
-
-        # 5. Create AssignmentConfig using the web dev template
-        config = AssignmentConfig(
-            criteria=criteria_json,
-            feedback=feedback_json,
-            setup=setup_json,
-            template="webdev"  # Use the web dev template
-        )
-
-        # 6. Create AutograderRequest
-        request = AutograderRequest(
-            submission_files=submission_files,
-            assignment_config=config,
-            student_name="Local Tester",
-            student_credentials="local_tester_01",  # Credentials for local testing
-            include_feedback=True,  # Request feedback
-            feedback_mode="default"  # Use default feedback for simplicity
-        )
-
-        # 7. Run the grading process
-        logger = logging.getLogger(__name__)
-        logger.info("--- Running Local Web Dev Test ---")
-        facade_response = Autograder.grade(request)
-
-        # 8. Print the results
-        logger.info("--- Grading Complete ---")
-        print(f"Status: {facade_response.status}")
-        print(f"Final Score: {facade_response.final_score}")
-        print("\n--- Feedback ---")
-        print(facade_response.feedback)
-        print("\n--- Test Report ---")
-        if facade_response.test_report:
-            for test in facade_response.test_report:
-                print(f"- {test.subject_name}: {test.test_name} -> Score: {test.score}, Report: {test.report}")
-        else:
-            print("No test report generated.")
diff --git a/autograder/context.py b/autograder/context.py
deleted file mode 100644
index 6838e12..0000000
--- a/autograder/context.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from connectors.models.autograder_request import AutograderRequest
-
-
-class RequestContext:
-    """
-    A Singleton class to hold the active AutograderRequest object.
-    This provides a global point of access to request data, avoiding the need
-    to pass the request object through multiple layers of the application.
-    """
-    _instance = None
-
-    @classmethod
-    def get_instance(cls):
-        """Gets the single instance of the class."""
-        if cls._instance is None:
-            cls._instance = cls.__new__(cls)
-            cls._instance.request = None
-        return cls._instance
-
-    def set_request(self, autograder_request: AutograderRequest | None):
-        """Sets the active autograder request for the current session."""
-        self.request = autograder_request
-        return self.request
-
-    def get_request(self):
-        """Gets the active autograder request."""
-        if self.request is None:
-            raise Exception("RequestContext has not been initialized. Call set_request() first.")
-        return self.request
-
-# Create a globally accessible instance
-request_context = RequestContext.get_instance()
diff --git a/autograder/core/report/fatal_report.py b/autograder/core/report/fatal_report.py
deleted file mode 100644
index caddeb4..0000000
--- a/autograder/core/report/fatal_report.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-import json
-
-
-
-
-class FatalReporter:
-    """
-    This class is responsible for generating a report for fatal errors in the autograder.
-    It reads a JSON file containing error details and formats it into a
-    user-friendly markdown report.
-    """
-    # --- Project Directory Setup ---
-    # These paths are configured to locate necessary files within the project structure.
-    _THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__))
-    _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(_THIS_FILE_DIR)))
-    VALIDATION_DIR = os.path.join(_PROJECT_ROOT, "autograder",'validation')
-    REQUEST_BUCKET_DIR = os.path.join(_PROJECT_ROOT, 'request_bucket')
-    RESULTS_DIR = os.path.join(VALIDATION_DIR, 'tests', 'results')
-
-    @staticmethod
-    def generate_feedback(report_path=None):
-        """
-        Generates a markdown feedback report based on fatal error results from a JSON file.
-
-        This method reads a JSON file that details fatal errors encountered by the
-        autograder, formats them into a structured and readable markdown report,
-        and returns the report as a string.
-
-        Args:
-            report_path (str, optional): The full path to the JSON report file.
-                                         If None, it defaults to a file named
-                                         'fatal_errors.json' in the class's RESULTS_DIR.
-
-        Returns:
-            str: A string containing the formatted markdown report.
-        """
-        # If no specific path is provided, construct the default path
-        if report_path is None:
-            print(FatalReporter.RESULTS_DIR)
-            report_path = os.path.join(FatalReporter.RESULTS_DIR, 'fatal_report.json')
-
-        # --- Read and Validate Report File ---
-        try:
-            with open(report_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-        except FileNotFoundError:
-            return "## ❌ Error\nCould not find the fatal error report file. Please contact an administrator."
-        except json.JSONDecodeError:
-            return "## ❌ Error\nCould not parse the fatal error report file due to a syntax error. Please contact an administrator."
-
-        errors = data.get("errors", [])
-        if not errors:
-            return "## ✅ No Fatal Errors Found\nYour submission passed all initial checks."
-
-        # --- Group Errors for Structured Reporting ---
-        grouped_errors = {}
-        for error in errors:
-            error_type = error.get("type", "unknown_error")
-            if error_type not in grouped_errors:
-                grouped_errors[error_type] = []
-            grouped_errors[error_type].append(error.get("message", "No message provided."))
-
-        # --- Build the Markdown Report ---
-        markdown_report = ["# 🚨 Autograder Fatal Error Report\n"]
-        markdown_report.append(
-            "We're sorry, but the autograder could not run due to the following critical issues with your submission. Please fix them and resubmit.\n")
-
-        # Handle specific, common error types with custom formatting
-        if "file_check" in grouped_errors:
-            markdown_report.append("---")
-            markdown_report.append("## 📁 Missing Files")
-            markdown_report.append(
-                "The following required files were not found. Please ensure they are named correctly and are located in the root directory of your project.\n")
-            for msg in grouped_errors.pop("file_check"):
-                # Attempt to extract the filename for cleaner display
-                try:
-                    filename = msg.split("'")[1]
-                    markdown_report.append(f"- ` {filename} `")
-                except IndexError:
-                    markdown_report.append(f"- {msg}")
-            markdown_report.append("\n")
-
-        # Handle any other error types generically
-        for error_type, messages in grouped_errors.items():
-            markdown_report.append("---")
-            heading = error_type.replace('_', ' ').title()
-            markdown_report.append(f"## ❗ {heading}")
-            for msg in messages:
-                markdown_report.append(f"- {msg}")
-            markdown_report.append("\n")
-
-        markdown_report.append("---\n")
-        markdown_report.append(
-            "**Next Steps:** Please review the errors listed above, correct your project files accordingly, and submit your work again.")
-
-        return "\n".join(markdown_report)
-
-    @classmethod
-    def create(cls, result):
-        """
-        This class method would be responsible for creating the initial
-        fatal_errors.json file before generate_feedback is called.
-        (Implementation is beyond the scope of this example).
-        """
-        # Example:
-        # report_path = os.path.join(cls.RESULTS_DIR, 'fatal_errors.json')
-        # with open(report_path, 'w', encoding='utf-8') as f:
-        #     json.dump(result, f, indent=2)
-        pass
-
-if __name__ == "__main__":
-    # Example usage
-    report = FatalReporter.generate_feedback()
-    print(report)
-    # Note: In a real scenario, you would call FatalReporter.create(result) to create the initial report file.
diff --git a/autograder/core/report/reporter_factory.py b/autograder/core/report/reporter_factory.py
deleted file mode 100644
index 51ff60c..0000000
--- a/autograder/core/report/reporter_factory.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.models.result import Result
-from autograder.core.report.ai_reporter import AIReporter
-from autograder.core.report.default_reporter import DefaultReporter
-class Reporter:
-    @classmethod
-    def create_ai_reporter(cls, result: Result, feedback: FeedbackPreferences,template, quota):
-        """Creates an AIReporter instance with the students results"""
-        return AIReporter.create(result,feedback,template,quota)
-
-    @classmethod
-    def create_default_reporter(cls, result: Result,feedback: FeedbackPreferences,template):
-        """Creates a DefaultReporter instance with the students results"""
-        return DefaultReporter.create(result,feedback,template)
-
-
diff --git a/autograder/core/schemas/config_schemas/ai_feedback_schema.json b/autograder/core/schemas/config_schemas/ai_feedback_schema.json
deleted file mode 100644
index e69de29..0000000
diff --git a/autograder/core/schemas/config_schemas/criteria_schema.json b/autograder/core/schemas/config_schemas/criteria_schema.json
deleted file mode 100644
index 761e6e3..0000000
--- a/autograder/core/schemas/config_schemas/criteria_schema.json
+++ /dev/null
@@ -1,196 +0,0 @@
-{
-  "test_library": "web_dev",
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "html": {
-        "weight": 60,
-        "subjects": {
-          "structure": {
-            "weight": 40,
-            "tests": [
-              {
-                "file": "index.html",
-                "name": "has_tag",
-                "calls": [
-                  ["body", 1],
-                  ["header", 1],
-                  ["nav", 1],
-                  ["main", 1],
-                  ["article", 4],
-                  ["img", 5],
-                  ["footer", 1],
-                  ["div", 1],
-                  ["form", 1],
-                  ["input", 1],
-                  ["button", 1]
-                ]
-              },
-              {
-                "file": "index.html",
-                "name": "has_attribute",
-                "calls": [
-                  ["class", 2]
-                ]
-              }
-            ]
-          },
-          "link": {
-            "weight": 20,
-            "tests": [
-              {
-                "file": "index.html",
-                "name": "check_css_linked"
-              },
-              {
-                "file": "index.html",
-                "name": "check_internal_links_to_articles",
-                "calls": [
-                  [4]
-                ]
-              }
-            ]
-          }
-        }
-      },
-      "css": {
-        "weight": 40,
-        "subjects": {
-          "responsivity": {
-            "weight": 50,
-            "tests": [
-              {
-                "file": "css/styles.css",
-                "name": "uses_relative_units"
-              },
-              {
-                "file": "css/styles.css",
-                "name": "check_media_queries"
-              },
-              {
-                "file": "css/styles.css",
-                "name": "check_flexbox_usage"
-              }
-            ]
-          },
-          "style": {
-            "weight": 50,
-            "tests": [
-              {
-                "file": "css/styles.css",
-                "name": "has_style",
-                "calls": [
-                  ["font-size", 1],
-                  ["font-family", 1],
-                  ["text-align", 1],
-                  ["display", 1],
-                  ["position", 1],
-                  ["margin", 1],
-                  ["padding", 1]
-                ]
-              }
-            ]
-          }
-        }
-      }
-    }
-  },
-  "bonus": {
-    "weight": 40,
-    "subjects": {
-      "accessibility": {
-        "weight": 20,
-        "tests": [
-          {
-            "file": "index.html",
-            "name": "check_all_images_have_alt"
-          }
-        ]
-      },
-      "head_detail": {
-        "weight": 80,
-        "tests": [
-          {
-            "file": "index.html",
-            "name": "check_head_details",
-            "calls": [
-              ["title"],
-              ["meta"]
-            ]
-          },
-          {
-            "file": "index.html",
-            "name": "check_attribute_and_value",
-            "calls": [
-              ["meta", "charset", "UTF-8"],
-              ["meta", "name", "viewport"],
-              ["meta", "name", "description"],
-              ["meta", "name", "author"],
-              ["meta", "name", "keywords"]
-            ]
-          }
-        ]
-      }
-    }
-  },
-  "penalty": {
-    "weight": 50,
-    "subjects": {
-      "html": {
-        "weight": 50,
-        "tests": [
-          {
-            "file": "index.html",
-            "name": "check_bootstrap_usage"
-          },
-          {
-            "file": "css/styles.css",
-            "name": "check_id_selector_over_usage",
-            "calls": [
-              [2]
-            ]
-          },
-          {
-            "file": "index.html",
-            "name": "has_forbidden_tag",
-            "calls": [
-              ["script"]
-            ]
-          },
-          {
-            "file": "index.html",
-            "name": "check_html_direct_children"
-          },
-          {
-            "file": "index.html",
-            "name": "check_tag_not_inside",
-            "calls": [
-              ["header", "main"],
-              ["footer", "main"]
-            ]
-          }
-        ]
-      },
-      "project_structure": {
-        "weight": 50,
-        "tests": [
-          {
-            "file": "all",
-            "name": "check_dir_exists",
-            "calls": [
-              ["css"],
-              ["imgs"]
-            ]
-          },
-          {
-            "file": "all",
-            "name": "check_project_structure",
-            "calls": [
-              ["css/styles.css"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/autograder/core/schemas/config_schemas/feedback_schema.json b/autograder/core/schemas/config_schemas/feedback_schema.json
deleted file mode 100644
index e69de29..0000000
diff --git a/autograder/core/utils/__init__.py b/autograder/core/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/autograder/core/utils/result_processor.py b/autograder/core/utils/result_processor.py
deleted file mode 100644
index 048add2..0000000
--- a/autograder/core/utils/result_processor.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import json
-import os
-
-
-class ResultProcessor:
-    # Define the project root here as well to ensure paths are consistent
-    _PROJECT_ROOT = os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..'))
-
-    @staticmethod
-    def load_results(result_file_name: str) -> dict:
-        """Loads test results from a JSON file using an absolute path."""
-        # Construct the absolute path from the project root
-        print("PROJECT ROOT:", ResultProcessor._PROJECT_ROOT)
-        absolute_path = os.path.join(ResultProcessor._PROJECT_ROOT, 'validation', '__tests__','results', result_file_name)
-
-        print(f"Attempting to load results from: {absolute_path}")
-        try:
-            with open(absolute_path, "r") as f:
-                data =  json.load(f)
-            # data is a list of test result dicts
-            passed_tests = [test for test in data if test.get("status") == "passed"]
-            failed_tests = [test for test in data if test.get("status") == "failed"]
-            quantitative_results = {}  # Not present in this format
-            return passed_tests, failed_tests, quantitative_results
-        except FileNotFoundError:
-            print(
-                f"ERROR: File not found at {absolute_path}. This indicates a race condition or a file naming mismatch.")
-            raise
-        except json.JSONDecodeError:
-            print(f"ERROR: Could not decode JSON from {absolute_path}. The file might be empty or malformed.")
-            raise
\ No newline at end of file
diff --git a/autograder/builder/__init__.py b/autograder/models/__init__.py
similarity index 100%
rename from autograder/builder/__init__.py
rename to autograder/models/__init__.py
diff --git a/autograder/builder/execution_helpers/__init__.py b/autograder/models/abstract/__init__.py
similarity index 100%
rename from autograder/builder/execution_helpers/__init__.py
rename to autograder/models/abstract/__init__.py
diff --git a/autograder/models/abstract/step.py b/autograder/models/abstract/step.py
new file mode 100644
index 0000000..d9830f4
--- /dev/null
+++ b/autograder/models/abstract/step.py
@@ -0,0 +1,9 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+
+class Step(ABC):
+    @abstractmethod
+    def execute(self, input: Any) -> Any:
+        pass
+
diff --git a/autograder/builder/models/template.py b/autograder/models/abstract/template.py
similarity index 100%
rename from autograder/builder/models/template.py
rename to autograder/models/abstract/template.py
diff --git a/autograder/builder/models/test_function.py b/autograder/models/abstract/test_function.py
similarity index 100%
rename from autograder/builder/models/test_function.py
rename to autograder/models/abstract/test_function.py
diff --git a/autograder/builder/models/criteria_tree.py b/autograder/models/criteria_tree.py
similarity index 98%
rename from autograder/builder/models/criteria_tree.py
rename to autograder/models/criteria_tree.py
index f983951..af4ec4e 100644
--- a/autograder/builder/models/criteria_tree.py
+++ b/autograder/models/criteria_tree.py
@@ -1,6 +1,5 @@
 from typing import List, Any
-from autograder.context import request_context
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 
 
 # Assuming TestResult is defined in a separate, importable file
@@ -120,7 +119,7 @@ def __repr__(self):
         return f"TestCategory(name='{self.name}', max_score={self.max_score}, subjects={list(self.subjects.keys()) if self.subjects else []})"
 
 
-class Criteria:
+class CriteriaTree:
     """The ROOT of the criteria tree."""
     def __init__(self, bonus_weight=0, penalty_weight=0):
         self.base = TestCategory("base")
diff --git a/autograder/builder/models/__init__.py b/autograder/models/dataclass/__init__.py
similarity index 100%
rename from autograder/builder/models/__init__.py
rename to autograder/models/dataclass/__init__.py
diff --git a/autograder/core/models/autograder_response.py b/autograder/models/dataclass/autograder_response.py
similarity index 54%
rename from autograder/core/models/autograder_response.py
rename to autograder/models/dataclass/autograder_response.py
index 5f65303..d1eb3f6 100644
--- a/autograder/core/models/autograder_response.py
+++ b/autograder/models/dataclass/autograder_response.py
@@ -1,19 +1,18 @@
-from typing import List, Optional
+from dataclasses import dataclass, field
+from typing import List
 
-from autograder.builder.models.test_function import TestFunction
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 
-from pydantic import BaseModel, Field
 
-
-class AutograderResponse(BaseModel):
+@dataclass
+class AutograderResponse:
     """
     Represents the response from the autograder.
     """
     status: str
     final_score: float = 0.0
     feedback: str = ""
-    test_report: List[TestResult] = Field(default_factory=list)
+    test_report: List[TestResult] = field(default_factory=list)
 
     def __repr__(self) -> str:
         feedback_size = len(self.feedback) if self.feedback else 0
diff --git a/autograder/core/models/feedback_preferences.py b/autograder/models/dataclass/feedback_preferences.py
similarity index 86%
rename from autograder/core/models/feedback_preferences.py
rename to autograder/models/dataclass/feedback_preferences.py
index 95d23c3..38f7719 100644
--- a/autograder/core/models/feedback_preferences.py
+++ b/autograder/models/dataclass/feedback_preferences.py
@@ -1,9 +1,10 @@
-from typing import List, Dict, Any, Optional
-from pydantic import BaseModel, Field
+from typing import List, Dict
+from dataclasses import dataclass, field
 from autograder.context import request_context
 
 
-class LearningResource(BaseModel):
+@dataclass
+class LearningResource:
     """Represents a single online resource linked to specific test names."""
     url: str
     description: str
@@ -13,44 +14,51 @@ def __repr__(self) -> str:
         return f"LearningResource(url='{self.url}', tests={self.linked_tests})"
 
 
-class GeneralPreferences(BaseModel):
+@dataclass
+class GeneralPreferences:
     """Preferences applicable to both Default and AI reporters."""
     report_title: str = "Relatório de Avaliação"
     show_score: bool = True
     show_passed_tests: bool = False
     add_report_summary: bool = True
-    online_content: List[LearningResource] = Field(default_factory=list)
+    online_content: List[LearningResource] = field(default_factory=list)
 
 
-class AiReporterPreferences(BaseModel):
+@dataclass
+class AiReporterPreferences:
     """Preferences specific to the AI Reporter."""
     provide_solutions: str = "hint"
     feedback_tone: str = "encouraging but direct"
     feedback_persona: str = "Code Buddy"
     assignment_context: str = ""
     extra_orientations: str = ""
-    submission_files_to_read: List[str] = Field(default_factory=list)
+    submission_files_to_read: List[str] = field(default_factory=list)
 
 
-class DefaultReporterPreferences(BaseModel):
+def _default_category_headers() -> Dict[str, str]:
+    """Factory function for default category headers."""
+    return {
+        "base": "✅ Requisitos Essenciais",
+        "bonus": "⭐ Pontos Extras",
+        "penalty": "❌ Pontos a Melhorar"
+    }
+
+
+@dataclass
+class DefaultReporterPreferences:
     """Preferences specific to the Default (template-based) Reporter."""
-    category_headers: Dict[str, str] = Field(
-        default_factory=lambda: {
-            "base": "✅ Requisitos Essenciais",
-            "bonus": "⭐ Pontos Extras",
-            "penalty": "❌ Pontos a Melhorar"
-        }
-    )
+    category_headers: Dict[str, str] = field(default_factory=_default_category_headers)
 
 
-class FeedbackPreferences(BaseModel):
+@dataclass
+class FeedbackPreferences:
     """
     A unified model to store all feedback preferences, including the new
     test-linked learning resources and legacy AI configurations.
     """
-    general: GeneralPreferences = Field(default_factory=GeneralPreferences)
-    ai: AiReporterPreferences = Field(default_factory=AiReporterPreferences)
-    default: DefaultReporterPreferences = Field(default_factory=DefaultReporterPreferences)
+    general: GeneralPreferences = field(default_factory=GeneralPreferences)
+    ai: AiReporterPreferences = field(default_factory=AiReporterPreferences)
+    default: DefaultReporterPreferences = field(default_factory=DefaultReporterPreferences)
 
     @classmethod
     def from_dict(cls) -> 'FeedbackPreferences':
diff --git a/autograder/models/dataclass/grading_result.py b/autograder/models/dataclass/grading_result.py
new file mode 100644
index 0000000..65c06b2
--- /dev/null
+++ b/autograder/models/dataclass/grading_result.py
@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class GradingResult:
+    final_score: float
+    status: str
+    feedback: Optional[str] = None
+    result_tree: 'ResultTree' = None
+    # In case of error
+    error: Optional[str] = None
+    failed_at_step: Optional[str] = None
diff --git a/autograder/builder/models/param_description.py b/autograder/models/dataclass/param_description.py
similarity index 100%
rename from autograder/builder/models/param_description.py
rename to autograder/models/dataclass/param_description.py
diff --git a/autograder/core/models/result.py b/autograder/models/dataclass/result.py
similarity index 73%
rename from autograder/core/models/result.py
rename to autograder/models/dataclass/result.py
index 8cee9f0..e389dfd 100644
--- a/autograder/core/models/result.py
+++ b/autograder/models/dataclass/result.py
@@ -1,9 +1,10 @@
 from typing import List, Dict
-from autograder.core.models.test_result import TestResult
+from dataclasses import dataclass, field
+from autograder.models.dataclass.test_result import TestResult
 
-from pydantic import BaseModel, Field
 
-class Result(BaseModel):
+@dataclass
+class Result:
     """
     Represents the result of an assignment submission.
     Contains a final score generated by the Scorer class and the Graders.
@@ -12,12 +13,10 @@ class Result(BaseModel):
 
     final_score: float
     author: str
-    submission_files: Dict[str,str] = Field(default_factory=dict, alias="submission_files")
-    base_results: List[TestResult] = Field(default_factory=list)
-    bonus_results: List[TestResult] = Field(default_factory=list)
-    penalty_results: List[TestResult] = Field(default_factory=list)
-
-    model_config = {"populate_by_name": True}
+    submission_files: Dict[str, str] = field(default_factory=dict)
+    base_results: List[TestResult] = field(default_factory=list)
+    bonus_results: List[TestResult] = field(default_factory=list)
+    penalty_results: List[TestResult] = field(default_factory=list)
 
     def get_test_report(self) -> List[TestResult]:
         return self.base_results + self.bonus_results + self.penalty_results
diff --git a/autograder/models/dataclass/step_result.py b/autograder/models/dataclass/step_result.py
new file mode 100644
index 0000000..18f1c48
--- /dev/null
+++ b/autograder/models/dataclass/step_result.py
@@ -0,0 +1,14 @@
+from dataclasses import dataclass
+from typing import Any, Optional
+
+# This should be a generic
+@dataclass
+class StepResult:
+    data: Any
+    error: Optional[str] = None
+    failed_at_step: Optional[str] = None
+    original_input: Any = None
+
+    @property
+    def is_successful(self) -> bool:
+        return self.error is None
\ No newline at end of file
diff --git a/autograder/core/models/test_result.py b/autograder/models/dataclass/test_result.py
similarity index 83%
rename from autograder/core/models/test_result.py
rename to autograder/models/dataclass/test_result.py
index 59d5b76..d7b80a3 100644
--- a/autograder/core/models/test_result.py
+++ b/autograder/models/dataclass/test_result.py
@@ -1,14 +1,15 @@
-from pydantic import BaseModel, Field
+from dataclasses import dataclass, field
 from typing import Dict, Any
 
-class TestResult(BaseModel):
+@dataclass
+class TestResult:
     """Stores the outcome of a single test execution from the test library."""
 
     test_name: str
     score: int
     report: str
     subject_name: str = ""
-    parameters: Dict[str, Any] = Field(default_factory=dict)
+    parameters: Dict[str, Any] = field(default_factory=dict)
 
     def get_result(self, *args, **kwargs) :
         return [self]
diff --git a/autograder/pipeline.py b/autograder/pipeline.py
new file mode 100644
index 0000000..8859caa
--- /dev/null
+++ b/autograder/pipeline.py
@@ -0,0 +1,36 @@
+from autograder.models.dataclass.grading_result import GradingResult
+from autograder.models.abstract.step import Step
+from autograder.models.dataclass.step_result import StepResult
+
+
+class AutograderPipeline:
+    def __init__(self):
+        self._steps = []
+
+    def add_step(self, step: Step) -> None:
+        self._filters.append(step)
+
+    def run(self, input_data):
+        result = StepResult(data=input_data, original_input=input_data) #Initialize result object with input data
+
+        for step in self._steps:
+            if not result.is_successful:
+                break
+            try:
+                result.data = step.execute(result.data)
+            except Exception as e:
+                result.error = str(e)
+                result.failed_at_step = step.__class__.__name__
+
+        if not result.is_successful:
+            return GradingResult(
+                final_score=0.0,
+                status="error",
+                feedback=None,
+                result_tree=None,
+                error=result.error,
+                failed_at_step=result.failed_at_step,
+            )
+        else:
+            return result.data # Assuming the final step returns a GradingResult (Which is bad)
+
diff --git a/autograder/builder/template_library/__init__.py b/autograder/services/__init__.py
similarity index 100%
rename from autograder/builder/template_library/__init__.py
rename to autograder/services/__init__.py
diff --git a/autograder/builder/tree_builder.py b/autograder/services/criteria_tree_service.py
similarity index 99%
rename from autograder/builder/tree_builder.py
rename to autograder/services/criteria_tree_service.py
index 373159f..f4d5282 100644
--- a/autograder/builder/tree_builder.py
+++ b/autograder/services/criteria_tree_service.py
@@ -4,7 +4,7 @@
 from autograder.builder.models.template import Template
 from autograder.context import request_context
 
-class CriteriaTree:
+class CriteriaTreeService:
     """A factory for creating a Criteria object from a configuration dictionary."""
     @staticmethod
     def build_pre_executed_tree(template: Template) -> Criteria:
diff --git a/autograder/core/grading/grader.py b/autograder/services/grader_service.py
similarity index 97%
rename from autograder/core/grading/grader.py
rename to autograder/services/grader_service.py
index bde6b74..c1be7a2 100644
--- a/autograder/core/grading/grader.py
+++ b/autograder/services/grader_service.py
@@ -1,12 +1,11 @@
-from typing import List, Dict, Optional
+from typing import Optional
 
-from autograder.context import request_context
-from autograder.builder.tree_builder import *
-from autograder.core.models.result import Result
-from autograder.core.models.test_result import TestResult
+from autograder.services.criteria_tree_service import *
+from autograder.models.dataclass.result import Result
+from autograder.models.dataclass.test_result import TestResult
 
 
-class Grader:
+class GraderService:
     """
     Traverses a Criteria tree, executes tests, and calculates a weighted score.
     Only includes scores from categories (base, bonus, penalty) that contain tests.
diff --git a/autograder/builder/pre_flight.py b/autograder/services/pre_flight_service.py
similarity index 98%
rename from autograder/builder/pre_flight.py
rename to autograder/services/pre_flight_service.py
index 51bf8e1..9beef8f 100644
--- a/autograder/builder/pre_flight.py
+++ b/autograder/services/pre_flight_service.py
@@ -1,6 +1,6 @@
 import logging
 from autograder.context import request_context
-class PreFlight:
+class PreFlightService:
     def __init__(self,required_files=None,setup_commands=None):
         self.required_files = required_files if required_files else []
         self.setup_commands = setup_commands if setup_commands else []
diff --git a/autograder/builder/template_library/templates/__init__.py b/autograder/services/report/__init__.py
similarity index 100%
rename from autograder/builder/template_library/templates/__init__.py
rename to autograder/services/report/__init__.py
diff --git a/autograder/core/report/ai_reporter.py b/autograder/services/report/ai_reporter.py
similarity index 98%
rename from autograder/core/report/ai_reporter.py
rename to autograder/services/report/ai_reporter.py
index 374afda..93c4564 100644
--- a/autograder/core/report/ai_reporter.py
+++ b/autograder/services/report/ai_reporter.py
@@ -1,9 +1,9 @@
 from openai import OpenAI
 
 from autograder.builder.models.template import Template
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.report.base_reporter import BaseReporter
-from autograder.core.utils.secrets_fetcher import get_secret
+from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
+from autograder.services.report.base_reporter import BaseReporter
+from autograder.utils.secrets_fetcher import get_secret
 
 
 # Supondo que estas classes estão em seus respectivos arquivos e são importáveis
diff --git a/autograder/core/report/base_reporter.py b/autograder/services/report/base_reporter.py
similarity index 100%
rename from autograder/core/report/base_reporter.py
rename to autograder/services/report/base_reporter.py
diff --git a/autograder/core/report/default_reporter.py b/autograder/services/report/default_reporter.py
similarity index 97%
rename from autograder/core/report/default_reporter.py
rename to autograder/services/report/default_reporter.py
index 0ab74b3..476f12e 100644
--- a/autograder/core/report/default_reporter.py
+++ b/autograder/services/report/default_reporter.py
@@ -1,6 +1,6 @@
 from autograder.builder.models.template import Template
-from autograder.core.models.feedback_preferences import FeedbackPreferences
-from autograder.core.report.base_reporter import BaseReporter
+from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
+from autograder.services.report.base_reporter import BaseReporter
 
 
 class DefaultReporter(BaseReporter):
diff --git a/autograder/services/report/reporter_factory.py b/autograder/services/report/reporter_factory.py
new file mode 100644
index 0000000..752ac18
--- /dev/null
+++ b/autograder/services/report/reporter_factory.py
@@ -0,0 +1,27 @@
+from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
+from autograder.models.dataclass.result import Result
+from autograder.services.report.ai_reporter import AIReporter
+from autograder.services.report.default_reporter import DefaultReporter
+class ReporterFactory:
+
+
+    @staticmethod
+    def create_reporter_for(mode: str):
+        """Creates a reporter instance based on the specified mode."""
+        if mode == "ai":
+            return ReporterFactory.create_ai_reporter()
+        else:
+            return ReporterFactory.create_default_reporter()
+
+
+    @classmethod
+    def create_ai_reporter(cls, result: Result, feedback: FeedbackPreferences,template, quota):
+        """Creates an AIReporter instance with the students results"""
+        return AIReporter.create(result,feedback,template,quota)
+
+    @classmethod
+    def create_default_reporter(cls, result: Result,feedback: FeedbackPreferences,template):
+        """Creates a DefaultReporter instance with the students results"""
+        return DefaultReporter.create(result,feedback,template)
+
+
diff --git a/autograder/builder/template_library/library.py b/autograder/services/template_library_service.py
similarity index 88%
rename from autograder/builder/template_library/library.py
rename to autograder/services/template_library_service.py
index ff9103c..74ebb39 100644
--- a/autograder/builder/template_library/library.py
+++ b/autograder/services/template_library_service.py
@@ -1,29 +1,27 @@
 import importlib.util
 import inspect
-import tempfile
-import os
 from autograder.builder.models.template import Template
 
 
-class TemplateLibrary:
+class TemplateLibraryService:
     @staticmethod
     def get_template(template_name: str, custom_template_content: str = None, clean=False):
         if template_name == "custom":
             if not custom_template_content:
                 raise ValueError("Custom template content must be provided for 'custom' template type.")
-            return TemplateLibrary._load_custom_template_from_content(custom_template_content)
+            return TemplateLibraryService._load_custom_template_from_content(custom_template_content)
 
         if template_name == "webdev":
-            from autograder.builder.template_library.templates.web_dev import WebDevTemplate
+            from autograder.template_library.web_dev import WebDevTemplate
             return WebDevTemplate(clean)
         if template_name == "api":
-            from autograder.builder.template_library.templates.api_testing import ApiTestingTemplate
+            from autograder.template_library.api_testing import ApiTestingTemplate
             return ApiTestingTemplate(clean)
         if template_name == "essay":
-            from autograder.builder.template_library.templates.essay_grader import EssayGraderTemplate
+            from autograder.template_library.essay_grader import EssayGraderTemplate
             return EssayGraderTemplate(clean)
         if template_name == "io":
-            from autograder.builder.template_library.templates.input_output import InputOutputTemplate
+            from autograder.template_library.input_output import InputOutputTemplate
             return InputOutputTemplate(clean)
         else:
             raise ValueError(f"Template '{template_name}' not found.")
diff --git a/autograder/core/utils/upstash_driver.py b/autograder/services/upstash_driver.py
similarity index 98%
rename from autograder/core/utils/upstash_driver.py
rename to autograder/services/upstash_driver.py
index c212c73..3c2832a 100644
--- a/autograder/core/utils/upstash_driver.py
+++ b/autograder/services/upstash_driver.py
@@ -4,7 +4,7 @@
 from upstash_redis import Redis
 
 load_dotenv()
-class Driver:
+class UpstashDriver:
     def __init__(self,redis):
         self.redis = redis
 
diff --git a/autograder/core/__init__.py b/autograder/steps/__init__.py
similarity index 100%
rename from autograder/core/__init__.py
rename to autograder/steps/__init__.py
diff --git a/autograder/steps/build_tree_step.py b/autograder/steps/build_tree_step.py
new file mode 100644
index 0000000..7ca274c
--- /dev/null
+++ b/autograder/steps/build_tree_step.py
@@ -0,0 +1,13 @@
+from autograder.services.criteria_tree_service import CriteriaTreeService
+from autograder.models.criteria_tree import CriteriaTree
+from autograder.models.abstract.step import Step
+from autograder.models.abstract.template import Template
+
+
+class BuildTreeStep(Step):
+    def __init__(self, criteria_json: dict):
+        self._criteria_json = criteria_json
+        self._criteria_tree_service = CriteriaTreeService
+
+    def execute(self, input: Template) -> CriteriaTree:
+        pass
\ No newline at end of file
diff --git a/autograder/steps/export_step.py b/autograder/steps/export_step.py
new file mode 100644
index 0000000..f97f778
--- /dev/null
+++ b/autograder/steps/export_step.py
@@ -0,0 +1,9 @@
+from autograder.models.abstract.step import Step
+from autograder.models.dataclass.step_result import StepResult
+
+
+class ExporterStep(Step):
+    def __init__(self, remote_driver):
+        self._remote_driver = remote_driver # UpstashDriver
+    def execute(self, input) -> StepResult:
+        pass
\ No newline at end of file
diff --git a/autograder/steps/feedback_step.py b/autograder/steps/feedback_step.py
new file mode 100644
index 0000000..5727e84
--- /dev/null
+++ b/autograder/steps/feedback_step.py
@@ -0,0 +1,15 @@
+from autograder.services.report.base_reporter import BaseReporter
+from autograder.models.dataclass.grading_result import GradingResult
+from autograder.models.abstract.step import Step
+
+
+class FeedbackStep(Step):
+    def __init__(self,
+                 reporter_service: BaseReporter,
+                 feedback_config: dict):
+        self._reporter_service = reporter_service
+        self._feedback_config = feedback_config
+
+    def execute(self, input: GradingResult) -> GradingResult:
+        """Adds feedback to the grading result using the reporter service."""
+        pass
diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
new file mode 100644
index 0000000..b98d76e
--- /dev/null
+++ b/autograder/steps/grade_step.py
@@ -0,0 +1,16 @@
+from autograder.models.criteria_tree import CriteriaTree
+from autograder.models.dataclass.grading_result import GradingResult
+from autograder.models.abstract.step import Step
+from autograder.services.grader_service import GraderService
+
+
+class GradeStep(Step):
+
+    def __init__(self):
+        self.submission_files = None # Injected at runtime
+        self._grader_service = GraderService() # GraderService here
+
+    def execute(self, input: CriteriaTree) -> GradingResult: # StepResult<GradingResult>
+        """Generate a grading result based on the criteria tree execution over a submission"""
+        pass
+
diff --git a/autograder/steps/load_template_step.py b/autograder/steps/load_template_step.py
new file mode 100644
index 0000000..b9dece4
--- /dev/null
+++ b/autograder/steps/load_template_step.py
@@ -0,0 +1,12 @@
+from autograder.services.template_library_service import TemplateLibraryService
+from autograder.models.abstract.step import Step
+
+
+class TemplateLoaderStep(Step):
+    def __init__(self, template_name: str, custom_template = None):
+        self._template_name = template_name
+        self._custom_template = custom_template
+        self._template_service = TemplateLibraryService()
+
+    def execute(self, input):
+        pass
\ No newline at end of file
diff --git a/autograder/steps/pre_flight_step.py b/autograder/steps/pre_flight_step.py
new file mode 100644
index 0000000..08d27cc
--- /dev/null
+++ b/autograder/steps/pre_flight_step.py
@@ -0,0 +1,10 @@
+from autograder.models.abstract.step import Step
+from autograder.services.pre_flight_service import PreFlightService
+
+
+class PreFlightStep(Step):
+    def __init__(self, setup_config):
+        self._setup_config = setup_config
+        self._pre_flight_service = PreFlightService
+    def execute(self, input):
+        pass
\ No newline at end of file
diff --git a/autograder/core/grading/__init__.py b/autograder/template_library/__init__.py
similarity index 100%
rename from autograder/core/grading/__init__.py
rename to autograder/template_library/__init__.py
diff --git a/autograder/builder/template_library/templates/api_testing.py b/autograder/template_library/api_testing.py
similarity index 98%
rename from autograder/builder/template_library/templates/api_testing.py
rename to autograder/template_library/api_testing.py
index 3d6d988..cf62d5b 100644
--- a/autograder/builder/template_library/templates/api_testing.py
+++ b/autograder/template_library/api_testing.py
@@ -5,8 +5,8 @@
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
 from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
-from autograder.builder.execution_helpers.sandbox_executor import SandboxExecutor
+from autograder.models.dataclass.test_result import TestResult
+from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
 # Configure basic logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -236,7 +236,7 @@ def get_test(self, name: str) -> TestFunction:
     import os
 
     # This allows the script to find the other autograder modules
-    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../..'))
+    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
     if project_root not in sys.path:
         sys.path.insert(0, project_root)
 
diff --git a/autograder/builder/template_library/templates/essay_grader.py b/autograder/template_library/essay_grader.py
similarity index 98%
rename from autograder/builder/template_library/templates/essay_grader.py
rename to autograder/template_library/essay_grader.py
index 0b6328d..5dd61b0 100644
--- a/autograder/builder/template_library/templates/essay_grader.py
+++ b/autograder/template_library/essay_grader.py
@@ -1,8 +1,8 @@
-from autograder.builder.execution_helpers.AI_Executor import AiExecutor, ai_executor
+from autograder.utils.executors.ai_executor import ai_executor
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
 from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 
 # ===============================================================
 # region: TestFunction Implementations
diff --git a/autograder/builder/template_library/templates/input_output.py b/autograder/template_library/input_output.py
similarity index 97%
rename from autograder/builder/template_library/templates/input_output.py
rename to autograder/template_library/input_output.py
index bc7cac4..f9ec96e 100644
--- a/autograder/builder/template_library/templates/input_output.py
+++ b/autograder/template_library/input_output.py
@@ -1,11 +1,8 @@
-import time
-import json
-
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
 from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
-from autograder.builder.execution_helpers.sandbox_executor import SandboxExecutor
+from autograder.models.dataclass.test_result import TestResult
+from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
 
 # ===============================================================
@@ -163,7 +160,7 @@ def get_test(self, name: str) -> TestFunction:
     import sys
     import os
 
-    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../..'))
+    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
     if project_root not in sys.path:
         sys.path.insert(0, project_root)
 
diff --git a/autograder/builder/template_library/templates/web_dev.py b/autograder/template_library/web_dev.py
similarity index 99%
rename from autograder/builder/template_library/templates/web_dev.py
rename to autograder/template_library/web_dev.py
index 89ae144..8cd2073 100644
--- a/autograder/builder/template_library/templates/web_dev.py
+++ b/autograder/template_library/web_dev.py
@@ -7,7 +7,7 @@
 from autograder.builder.models.test_function import TestFunction
 from autograder.builder.models.param_description import ParamDescription
 
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 
 
 # ===============================================================
diff --git a/autograder/core/models/__init__.py b/autograder/utils/__init__.py
similarity index 100%
rename from autograder/core/models/__init__.py
rename to autograder/utils/__init__.py
diff --git a/autograder/core/report/__init__.py b/autograder/utils/executors/__init__.py
similarity index 100%
rename from autograder/core/report/__init__.py
rename to autograder/utils/executors/__init__.py
diff --git a/autograder/builder/execution_helpers/AI_Executor.py b/autograder/utils/executors/ai_executor.py
similarity index 99%
rename from autograder/builder/execution_helpers/AI_Executor.py
rename to autograder/utils/executors/ai_executor.py
index 0fa6178..343024f 100644
--- a/autograder/builder/execution_helpers/AI_Executor.py
+++ b/autograder/utils/executors/ai_executor.py
@@ -1,11 +1,11 @@
 import json
 from typing import List
 from openai import OpenAI
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 from pydantic import BaseModel, Field
 from autograder.context import request_context
 import dotenv
-from autograder.core.utils.secrets_fetcher import get_secret
+from autograder.utils.secrets_fetcher import get_secret
 
 dotenv.load_dotenv()  # Load environment variables from .env file
 
diff --git a/autograder/builder/execution_helpers/sandbox_executor.py b/autograder/utils/executors/sandbox_executor.py
similarity index 100%
rename from autograder/builder/execution_helpers/sandbox_executor.py
rename to autograder/utils/executors/sandbox_executor.py
diff --git a/autograder/core/utils/secrets_fetcher.py b/autograder/utils/secrets_fetcher.py
similarity index 100%
rename from autograder/core/utils/secrets_fetcher.py
rename to autograder/utils/secrets_fetcher.py
diff --git a/connectors/adapters/__init__.py b/connectors/adapters/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/connectors/adapters/api/__init__.py b/connectors/adapters/api/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/connectors/adapters/api/api_adapter.py b/connectors/adapters/api/api_adapter.py
deleted file mode 100644
index 70d4934..0000000
--- a/connectors/adapters/api/api_adapter.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import inspect
-import textwrap
-from typing import List, Optional, Dict, Any
-from fastapi import UploadFile
-
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-import json
-from connectors.port import Port
-import logging
-from autograder.builder.template_library.library import TemplateLibrary
-from autograder.context import request_context
-
-class ApiAdapter(Port):
-
-    def export_results(self):
-        """
-        Prepares the results of the autograding workfow as an API response.
-        Also retrieves important data from the request (student_credentiaals)
-        """
-        if not self.autograder_response:
-            raise Exception("No autograder response available. Please run the autograder first.")
-
-        # Prepare the API response
-        test_report = self.autograder_response.test_report
-        response = {
-            "server_status": "Server connection happened successfully",
-            "autograding_status": self.autograder_response.status,
-            "final_score": self.autograder_response.final_score,
-            "feedback": self.autograder_response.feedback,
-            "test_report": [test_result.to_dict() for test_result in test_report] if test_report else [],
-        }
-
-        return response
-
-    async def create_request(self,
-                       submission_files: List[UploadFile],
-                       assignment_config: AssignmentConfig,
-                       student_name,
-                       student_credentials,
-                       include_feedback=False,
-                       feedback_mode="default",
-                       openai_key=None,
-                       redis_url=None,
-                       redis_token=None):
-        submission_files_dict = {}
-        for submission_file in submission_files:
-            if ".git" in submission_file.filename:
-                continue
-            submission_content = await submission_file.read()
-            submission_files_dict[submission_file.filename] =  submission_content.decode("utf-8")
-        self.autograder_request = AutograderRequest(
-            submission_files=submission_files_dict,
-            assignment_config=assignment_config,
-            student_name=student_name,
-            student_credentials=student_credentials,
-            include_feedback=include_feedback,
-            feedback_mode=feedback_mode,
-            openai_key=openai_key,
-            redis_url=redis_url,
-            redis_token=redis_token,
-        )
-
-
-    async def load_assignment_config(self, template: str, criteria: UploadFile, feedback: UploadFile,
-                               setup: Optional[UploadFile] = None, custom_template: Optional[UploadFile] = None) -> AssignmentConfig:
-        """
-        Loads the assignment configuration based on the provided template preset.
-        """
-        logger = logging.getLogger(__name__)
-        try:
-            # Read and parse template name
-            template_name = template
-            logger.info(f"Template name: {template_name}")
-
-            # Loads the raw json strings (template,criteria,feedback and setup) into dictionaries
-            criteria_content = await criteria.read()
-            criteria_dict = json.loads(criteria_content.decode("utf-8")) if criteria else None
-            logger.info(f"Criteria loaded: {criteria_dict is not None}")
-
-            feedback_content = await feedback.read()
-            feedback_dict = json.loads(feedback_content.decode("utf-8")) if feedback else None
-            logger.info(f"Feedback config loaded: {feedback_dict is not None}")
-
-            setup_dict = None
-            if setup:
-                setup_content = await setup.read()
-                setup_dict = json.loads(setup_content.decode("utf-8")) if setup else None
-                logger.info(f"Setup config loaded: {setup_dict is not None}")
-            custom_template_str = None
-            if custom_template:
-                custom_template_content = await custom_template.read()
-                custom_template_str = custom_template_content.decode("utf-8")
-
-            return AssignmentConfig(criteria=criteria_dict, feedback=feedback_dict, setup=setup_dict,
-                                    template=template_name, custom_template_str = custom_template_str)
-
-        except json.JSONDecodeError as e:
-            logger.error(f"Invalid JSON in configuration files: {e}")
-            raise ValueError(f"Invalid JSON format in configuration files: {e}")
-        except UnicodeDecodeError as e:
-            logger.error(f"Encoding error reading configuration files: {e}")
-            raise ValueError(f"Unable to decode configuration files: {e}")
-
-    def get_template_info(self,template_name: str) -> Dict[str, Any]:
-        """
-        Retrieves a dictionary containing all the information of a Template,
-        including its name, description, and full details for each test function
-        (name, description, parameters, and source code).
-        """
-
-        request_context.set_request(AutograderRequest.build_empty_request())
-        print("REQUEST_CONTEXT:", request_context.get_request())
-        # 1. Retrieve an instance of the template from the library
-        return TemplateLibrary.get_template_info(template_name)
-        
-
-    
-
-if __name__ == "__main__":
-    adapter = ApiAdapter()
-    template_info = adapter.get_template_info("web dev")
-    print(template_info)
-
diff --git a/connectors/adapters/github_action_adapter/__init__.py b/connectors/adapters/github_action_adapter/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/connectors/adapters/github_action_adapter/github_adapter.py b/connectors/adapters/github_action_adapter/github_adapter.py
deleted file mode 100644
index 2684544..0000000
--- a/connectors/adapters/github_action_adapter/github_adapter.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import json
-import os
-import shutil
-
-from connectors.models.assignment_config import AssignmentConfig
-from connectors.models.autograder_request import AutograderRequest
-from connectors.port import Port
-from github import Github
-from github.GithubException import UnknownObjectException
-
-class GithubAdapter(Port):
-    def __init__(self,github_token,app_token):
-        super().__init__()
-        self.github_token = github_token
-        self.app_token = app_token
-        self.repo = self.get_repository(app_token)
-
-    def get_repository(self,app_token):
-        try:
-            repos = os.getenv("GITHUB_REPOSITORY")
-            g = Github(app_token)
-            repo = g.get_repo(repos)
-            print("This repo is: ", repo)
-            return repo
-        except:
-            raise Exception("Failed to get repository. Please check your GitHub token and repository settings.")
-
-    def notify_classroom(self):
-        final_score = self.autograder_response.final_score
-        if final_score < 0 or final_score > 100:
-            print("Invalid final score. It should be between 0 and 100.")
-            return
-
-            # Retrieve the GitHub token and repository information from environment variables
-
-        repo_name = os.getenv("GITHUB_REPOSITORY")
-        if not repo_name:
-            print("Repository information is missing.")
-            return
-
-        token = os.getenv("GITHUB_TOKEN")
-        # Create the GitHub client using the token
-        g = Github(token)
-        repo = g.get_repo(repo_name)
-
-        # Get the workflow run ID
-        run_id = os.getenv("GITHUB_RUN_ID")
-        if not run_id:
-            print("Run ID is missing.")
-            return
-
-        # Fetch the workflow run
-        workflow_run = repo.get_workflow_run(int(run_id))
-
-        # Find the check suite run ID
-        check_suite_url = workflow_run.check_suite_url
-        check_suite_id = int(check_suite_url.split('/')[-1])
-
-        # Get the check runs for this suite
-        check_runs = repo.get_check_suite(check_suite_id)
-        check_run = next((run for run in check_runs.get_check_runs() if run.name == "grading"), None)
-        if not check_run:
-            print("Check run not found.")
-            return
-        # Create a summary for the final grade
-        text = f"Final Score: {format(final_score, '.2f')}/100"
-
-        # Update the check run with the final score
-        check_run.edit(
-            name="Autograding",
-            output={
-                "title": "Autograding Result",
-                "summary": text,
-                "text": json.dumps({"totalPoints": format(final_score, '.2f'), "maxPoints": 100}),
-                "annotations": [{
-                    "path": ".github",
-                    "start_line": 1,
-                    "end_line": 1,
-                    "annotation_level": "notice",
-                    "message": text,
-                    "title": "Autograding complete"
-                }]
-            }
-        )
-
-        print(f"Final grade updated: {format(final_score, '.2f')}/100")
-
-    def commit_feedback(self):
-        file_path = "relatorio.md"
-        file_sha = None
-        commit_message = ""
-        # If the autograder_request exists and include_feedback is explicitly False,
-        # skip committing the relatorio.md file.
-        req = getattr(self, 'autograder_request', None)
-        if req is not None and not getattr(req, 'include_feedback', False):
-            print("Feedback generation disabled (include_feedback=False), skipping commit of relatorio.md.")
-            return
-
-        # Safely get feedback content (may be None or autograder_response may not exist)
-        new_content = None
-        resp = getattr(self, 'autograder_response', None)
-        if resp is not None:
-            new_content = getattr(resp, 'feedback', None)
-        # 1. Tente obter o arquivo para ver se ele já existe
-        try:
-            file = self.repo.get_contents(file_path)
-            file_sha = file.sha
-            print(f"Arquivo '{file_path}' encontrado. Preparando para atualizar...")
-        except UnknownObjectException:
-            print(f"Arquivo '{file_path}' não encontrado. Preparando para criar...")
-            pass
-
-        # 2. Fora do try/except, decida se cria ou atualiza
-        if file_sha:
-            commit_message = f"Atualizando relatório: {file_path}"
-            self.repo.update_file(path=file_path, message=commit_message, content=new_content, sha=file_sha)
-            print("Relatório atualizado com sucesso.")
-        else:
-            commit_message = f"Criando relatório: {file_path}"
-            self.repo.create_file(path=file_path, message=commit_message, content=new_content)
-            print("Relatório criado com sucesso.")
-    def export_results(self):
-        self.commit_feedback()
-        self.notify_classroom()
-
-
-
-    def get_submission_files(self):
-
-        base_path = os.getenv("GITHUB_WORKSPACE", ".")
-        submission_path = os.path.join(base_path, 'submission')
-        submission_files_dict = {}
-
-        # take all files in the submission directory and add them to the submission_files_dict
-        for root, dirs, files in os.walk(submission_path):
-         # Skip .git directory
-         if '.git' in dirs:
-             dirs.remove('.git')
-         if '.github' in dirs:
-             dirs.remove('.github')
-         for file in files:
-             # Full path to the file
-             file_path = os.path.join(root, file)
-
-             # Key: Path relative to the starting directory to ensure uniqueness
-             relative_path = os.path.relpath(file_path, submission_path)
-
-             try:
-                 with open(file_path, "r", encoding='utf-8', errors='ignore') as f:
-                     print("Adding file to submission_files_dict: ", relative_path)
-                     # Use the unique relative_path as the key
-                     submission_files_dict[relative_path] = f.read()
-             except Exception as e:
-                 print(f"Could not read file {file_path}: {e}")
-
-        return submission_files_dict
-
-    def create_request(self, submission_files, assignment_config, student_name, student_credentials, feedback_mode="default", openai_key=None, redis_url=None, redis_token=None, include_feedback = False):
-        """
-        Creates an AutograderRequest object with the provided parameters.
-        """
-        print("Getting submission files from the repository...")
-        submission_files_dict = self.get_submission_files()
-        print(submission_files_dict)
-        print(f"Creating AutograderRequest with {feedback_mode} feedback mode")
-        self.autograder_request = AutograderRequest(
-            submission_files=submission_files_dict,
-            assignment_config=assignment_config,
-            student_name=student_name,
-            student_credentials=student_credentials,
-            include_feedback=include_feedback,
-            feedback_mode=feedback_mode,
-            openai_key=openai_key,
-            redis_url=redis_url,
-            redis_token=redis_token,
-        )
-        print(f"AutograderRequest created with {self.autograder_request.feedback_mode} feedback mode")
-
-    def create_assigment_config(self,template_preset):
-        """
-        Looks inside $GITHUB_WORKSPACE/submission/.github/autograder for the criteria.json, feedback.json and setup.json files.
-        """
-        base_path = os.getenv("GITHUB_WORKSPACE", ".")
-        submission_path = os.path.join(base_path, 'submission')
-        configuration_path = os.path.join(submission_path, '.github','autograder')
-
-        criteria_path = os.path.join(configuration_path, 'criteria.json')
-        if not os.path.exists(criteria_path):
-            raise FileNotFoundError("criteria.json file not found in the autograder configuration directory.")
-        feedback_path = os.path.join(configuration_path, 'feedback.json')
-        if not os.path.exists(feedback_path):
-            raise FileNotFoundError("feedback.json file not found in the autograder configuration directory.")
-        setup_path = os.path.join(configuration_path, 'setup.json')
-
-
-        criteria_dict = None
-        feedback_dict = None
-        setup_dict = None
-
-        with open(criteria_path, 'r', encoding='utf-8') as f:
-            criteria_dict = json.load(f)
-        print("Criteria loaded successfully.")
-
-
-
-        with open(feedback_path, 'r', encoding='utf-8') as f:
-            feedback_dict = json.load(f)
-        print("Feedback config loaded successfully.")
-
-
-
-        with open(setup_path, 'r', encoding='utf-8') as f:
-            setup_dict = json.load(f)
-        print("Setup config loaded successfully.")
-
-        custom_template_str = None
-        if template_preset == "custom":
-            custom_template_path = os.path.join(configuration_path, 'template.py')
-            if not os.path.exists(custom_template_path):
-                raise FileNotFoundError("Custom template file 'template.py' not found in the autograder configuration directory.")
-            with open(custom_template_path, 'r', encoding='utf-8') as f:
-                custom_template_str = f.read()
-            print("Custom template loaded successfully.")
-
-        assignment_config = AssignmentConfig(
-            template=template_preset,
-            criteria=criteria_dict,
-            feedback=feedback_dict,
-            setup=setup_dict,
-            custom_template_str=custom_template_str,
-        )
-        return assignment_config
-
-
-    @classmethod
-    def create(cls,test_framework,github_author,feedback_type,github_token,app_token,openai_key=None,redis_url=None,redis_token=None):
-        response = cls(test_framework,github_author)
-        response.get_repository(app_token)
-        return response
\ No newline at end of file
diff --git a/connectors/adapters/api/api_entrypoint.py b/connectors/api_connector.py
similarity index 99%
rename from connectors/adapters/api/api_entrypoint.py
rename to connectors/api_connector.py
index 778db74..8af0bcc 100644
--- a/connectors/adapters/api/api_entrypoint.py
+++ b/connectors/api_connector.py
@@ -7,7 +7,7 @@
 from connectors.models.assignment_config import AssignmentConfig
 import uvicorn
 
-from connectors.adapters.api.api_adapter import ApiAdapter
+from connectors import ApiAdapter
 # Initialize the FastAPI app
 app = FastAPI(
     title="WebTech Autograder API Service",
diff --git a/connectors/adapters/github_action_adapter/github_entrypoint.py b/connectors/github_connector.py
similarity index 97%
rename from connectors/adapters/github_action_adapter/github_entrypoint.py
rename to connectors/github_connector.py
index 77505d4..de22ca5 100644
--- a/connectors/adapters/github_action_adapter/github_entrypoint.py
+++ b/connectors/github_connector.py
@@ -1,5 +1,5 @@
 from argparse import ArgumentParser
-from connectors.adapters.github_action_adapter.github_adapter import GithubAdapter
+from connectors import GithubAdapter
 from connectors.models.assignment_config import AssignmentConfig
 parser = ArgumentParser(description="GitHub Action Adapter for Autograder")
 parser.add_argument("--github-token", type=str, required=True, help="GitHub Token")
diff --git a/connectors/models/__init__.py b/connectors/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/connectors/models/assignment_config.py b/connectors/models/assignment_config.py
deleted file mode 100644
index c106745..0000000
--- a/connectors/models/assignment_config.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import os
-from typing import Dict, Any, Optional
-from pydantic import BaseModel, Field
-
-
-class AssignmentConfig(BaseModel):
-    template: str = "custom"
-    criteria: Dict[str, Any]
-    feedback: Dict[str, Any]
-    setup: Optional[Dict[str, Any]] = None
-    custom_template: Optional[str] = None
-    
-    def __str__(self) -> str:
-        """
-        Returns a string representation of the AssignmentConfig object.
-        """
-        criteria = "[Loaded]" if self.criteria else "[Not Loaded]"
-        feedback = "[Loaded]" if self.feedback else "[Not Loaded]"
-        setup = "[Loaded]" if self.setup else "[Not Loaded]"
-        template_str = "[Loaded]" if self.custom_template else "[Not Loaded]"
-        
-        return (
-            f"AssignmentConfig(template={self.template}, criteria={criteria}, "
-            f"feedback={feedback}, setup={setup}, custom_template_str={template_str})"
-        )
-
-
-if __name__ == "__main__":
-    # Example usage
-    config = AssignmentConfig(
-        template="custom",
-        criteria={"test_case_1": "description"},
-        feedback={"style": "detailed"},
-        setup={"environment": "python3.8"},
-        custom_template="def custom_function(): pass"
-    )
-    print(config)
\ No newline at end of file
diff --git a/connectors/models/autograder_request.py b/connectors/models/autograder_request.py
deleted file mode 100644
index fce0cc6..0000000
--- a/connectors/models/autograder_request.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from typing import Dict, Any, Optional
-from pydantic import BaseModel, Field
-from connectors.models.assignment_config import AssignmentConfig
-
-
-class AutograderRequest(BaseModel):
-    submission_files: Dict[str, Any]
-    assignment_config: AssignmentConfig
-    student_name: str
-    student_credentials: Optional[str] = None
-    include_feedback: bool = False
-    feedback_mode: str = "default"
-    openai_key: Optional[str] = None
-    redis_url: Optional[str] = None
-    redis_token: Optional[str] = None
-    criteria_tree: Optional[Any] = None
-    reporter: Optional[Any] = None
-    feedback_report: Optional[Any] = None
-    
-    def __str__(self) -> str:
-        stri = f"{len(self.submission_files)} submission files.\n"
-        stri += f"Assignment config: {self.assignment_config}\n"
-        stri += f"Student name: {self.student_name}\n"
-        stri += f"Feedback mode: {self.feedback_mode}\n"
-        return stri
-    
-    @classmethod
-    def build_empty_request(cls) -> "AutograderRequest":
-        return cls(
-            submission_files={},
-            assignment_config=AssignmentConfig(
-                criteria={}, 
-                feedback={}, 
-                setup={}, 
-                template=""
-            ),
-            student_name="",
-            include_feedback=False
-        )
\ No newline at end of file
diff --git a/connectors/port.py b/connectors/port.py
deleted file mode 100644
index 7198bef..0000000
--- a/connectors/port.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from abc import ABC, abstractmethod
-
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.autograder_facade import Autograder
-
-
-class Port(ABC):
-    """
-    Abstract Port class that defines the accepted interface for the core system communication.
-    """
-    def __init__(self):
-        self.autograder_request = None
-        self.autograder_response = None
-
-    def run_autograder(self):
-        try:
-            response = Autograder.grade(self.autograder_request)
-            self.autograder_response = response
-            return self
-        except Exception as e:
-            raise Exception(f"Error running autograder: {e}") from e
-
-
-    @abstractmethod
-    def export_results(self):
-        """
-        Abstract method to export the results of the autograder.
-        This method should be implemented by the concrete Port classes.
-        """
-        pass
-
-    @abstractmethod
-    def create_request(self,
-                       submission_files,
-                       assignment_config: AssignmentConfig,
-                       student_name,
-                       student_credentials,
-                       feedback_mode="default",
-                       openai_key=None,
-                       redis_url=None,
-                       redis_token=None):
-        """
-        Abstract method to create an AutograderRequest object.
-        This method should be implemented by the concrete Port classes.
-        """
-        pass
diff --git a/docs/system/creating_assignments.md b/docs/system/creating_assignments.md
index 95970b7..ebd7639 100644
--- a/docs/system/creating_assignments.md
+++ b/docs/system/creating_assignments.md
@@ -128,7 +128,8 @@ If you need complete control over grading logic, you can create a `template.py`
 ```python
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
+
 
 # ===============================================================
 # region: TestFunction Implementations
@@ -138,18 +139,18 @@ class HasRequiredFile(TestFunction):
     @property
     def name(self):
         return "has_required_file"
-    
+
     @property
     def description(self):
         return "Checks if a required file exists in the submission"
-    
+
     @property
     def parameter_description(self):
         return {
             "file_path": "Path to the required file",
             "file_name": "Name of the required file"
         }
-    
+
     def execute(self, file_path: str, file_name: str) -> TestResult:
         import os
         exists = os.path.exists(file_path)
@@ -162,18 +163,18 @@ class CheckMinimumLines(TestFunction):
     @property
     def name(self):
         return "check_minimum_lines"
-    
+
     @property
     def description(self):
         return "Checks if a file has at least a minimum number of lines"
-    
+
     @property
     def parameter_description(self):
         return {
             "file_content": "Content of the file to check",
             "min_lines": "Minimum number of lines required"
         }
-    
+
     def execute(self, file_content: str, min_lines: int) -> TestResult:
         lines = file_content.strip().split('\n')
         actual_lines = len([line for line in lines if line.strip()])
@@ -190,37 +191,37 @@ class CustomAssignmentTemplate(Template):
     """
     A custom template for a specific assignment.
     """
-    
+
     @property
     def template_name(self):
         return "Custom Assignment Template"
-    
+
     @property
     def template_description(self):
         return "Custom grading template for specific assignment requirements"
-    
+
     @property
     def requires_execution_helper(self) -> bool:
         return False
-    
+
     @property
     def execution_helper(self):
         return None
-    
+
     @property
     def requires_pre_executed_tree(self) -> bool:
         return False
-    
+
     def __init__(self):
         self.tests = {
             "has_required_file": HasRequiredFile(),
             "check_minimum_lines": CheckMinimumLines(),
             # Add more custom tests here
         }
-    
+
     def stop(self):
         pass
-    
+
     def get_test(self, name: str) -> TestFunction:
         """
         Retrieves a specific test function instance from the template.
@@ -424,10 +425,11 @@ submission/
 ```python
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 from bs4 import BeautifulSoup
 import re
 
+
 # ===============================================================
 # region: TestFunction Implementations
 # ===============================================================
@@ -436,28 +438,28 @@ class CheckResponsiveImages(TestFunction):
     @property
     def name(self):
         return "check_responsive_images"
-    
+
     @property
     def description(self):
         return "Checks if images use responsive attributes"
-    
+
     @property
     def parameter_description(self):
         return {
             "html_content": "The HTML content to analyze",
             "min_count": "Minimum number of responsive images required"
         }
-    
+
     def execute(self, html_content: str, min_count: int) -> TestResult:
         soup = BeautifulSoup(html_content, 'html.parser')
         images = soup.find_all('img')
         responsive_count = 0
-        
+
         for img in images:
             # Check for responsive attributes
             if img.get('srcset') or 'responsive' in img.get('class', []):
                 responsive_count += 1
-        
+
         score = min(100, int((responsive_count / min_count) * 100)) if min_count > 0 else 100
         report = f"Found {responsive_count} of {min_count} required responsive images."
         return TestResult(self.name, score, report, parameters={"min_count": min_count})
@@ -467,23 +469,23 @@ class CheckMediaQueries(TestFunction):
     @property
     def name(self):
         return "check_media_queries"
-    
+
     @property
     def description(self):
         return "Checks if CSS contains media queries for responsive design"
-    
+
     @property
     def parameter_description(self):
         return {
             "css_content": "The CSS content to analyze",
             "min_breakpoints": "Minimum number of breakpoints required"
         }
-    
+
     def execute(self, css_content: str, min_breakpoints: int) -> TestResult:
         pattern = r'@media\s*\([^)]+\)'
         matches = re.findall(pattern, css_content)
         breakpoint_count = len(matches)
-        
+
         score = min(100, int((breakpoint_count / min_breakpoints) * 100)) if min_breakpoints > 0 else 100
         report = f"Found {breakpoint_count} of {min_breakpoints} required media queries."
         return TestResult(self.name, score, report, parameters={"min_breakpoints": min_breakpoints})
@@ -497,36 +499,36 @@ class ResponsiveLandingPageTemplate(Template):
     """
     Custom template for responsive landing page assignment.
     """
-    
+
     @property
     def template_name(self):
         return "Responsive Landing Page Template"
-    
+
     @property
     def template_description(self):
         return "Evaluates responsive design implementation in landing pages"
-    
+
     @property
     def requires_execution_helper(self) -> bool:
         return False
-    
+
     @property
     def execution_helper(self):
         return None
-    
+
     @property
     def requires_pre_executed_tree(self) -> bool:
         return False
-    
+
     def __init__(self):
         self.tests = {
             "check_responsive_images": CheckResponsiveImages(),
             "check_media_queries": CheckMediaQueries(),
         }
-    
+
     def stop(self):
         pass
-    
+
     def get_test(self, name: str) -> TestFunction:
         """
         Retrieves a specific test function instance from the template.
diff --git a/docs/system/execution/execution_helpers.md b/docs/system/execution/execution_helpers.md
index 24171f5..36bc708 100644
--- a/docs/system/execution/execution_helpers.md
+++ b/docs/system/execution/execution_helpers.md
@@ -56,7 +56,7 @@ Important details:
 Usage example (conceptual):
 
 ```py
-from autograder.builder.execution_helpers.AI_Executor import AiExecutor
+from autograder.utils.executors.ai_executor import AiExecutor
 
 ai = AiExecutor()
 ai.send_submission_files({'main.py': 'print("hi")'})
@@ -147,7 +147,7 @@ Important configuration:
 Usage example (conceptual):
 
 ```py
-from autograder.builder.execution_helpers.sandbox_executor import SandboxExecutor
+from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
 # Create and start from request context
 sandbox = SandboxExecutor.start()
diff --git a/tests/data/custom_template/custom_template.py b/tests/data/custom_template/custom_template.py
index 18f69df..56e0f7c 100644
--- a/tests/data/custom_template/custom_template.py
+++ b/tests/data/custom_template/custom_template.py
@@ -1,7 +1,7 @@
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
 from autograder.builder.models.param_description import ParamDescription
-from autograder.core.models.test_result import TestResult
+from autograder.models.dataclass.test_result import TestResult
 from autograder.context import request_context
 
 
diff --git a/tests/data/web_dev/criteria.json b/tests/data/web_dev/criteria.json
index b243029..ab144a7 100644
--- a/tests/data/web_dev/criteria.json
+++ b/tests/data/web_dev/criteria.json
@@ -4,7 +4,8 @@
     "subjects": {
       "html_structure": {
         "weight": 40,
-        "tests": [
+        "tests":
+        [
           {
             "name": "has_tag",
             "file": "index.html",
diff --git a/tests/unit/builder/test_tree.py b/tests/unit/builder/test_tree.py
index 3db3717..51b74ff 100644
--- a/tests/unit/builder/test_tree.py
+++ b/tests/unit/builder/test_tree.py
@@ -1,6 +1,6 @@
 import unittest
 # Assuming your tree builder and models are in this path
-from autograder.builder.tree_builder import CriteriaTree, Criteria, Subject, Test, TestCall
+from autograder.services.criteria_tree_service import CriteriaTree, Criteria, Subject, Test, TestCall
 
 class TestCriteriaTree(unittest.TestCase):
 
diff --git a/tests/unit/core/reporter/test_default_reporter.py b/tests/unit/core/reporter/test_default_reporter.py
index 2e9d94b..36f09e8 100644
--- a/tests/unit/core/reporter/test_default_reporter.py
+++ b/tests/unit/core/reporter/test_default_reporter.py
@@ -1,8 +1,8 @@
 import unittest
-from autograder.core.report.default_reporter import DefaultReporter
-from autograder.core.models.result import Result
-from autograder.core.models.test_result import TestResult
-from autograder.core.models.feedback_preferences import FeedbackPreferences
+from autograder.services.report import DefaultReporter
+from autograder.models.dataclass.result import Result
+from autograder.models.dataclass.test_result import TestResult
+from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
 
 
 class TestDefaultReporter(unittest.TestCase):
diff --git a/tests/unit/core/test_grader.py b/tests/unit/core/test_grader.py
index a1a1adb..ac971f0 100644
--- a/tests/unit/core/test_grader.py
+++ b/tests/unit/core/test_grader.py
@@ -1,13 +1,12 @@
 import unittest
-from typing import Dict, List, Any
 
 # Assuming these classes are in your project structure
-from autograder.builder.tree_builder import CriteriaTree, Criteria, Subject, Test, TestCall
-from autograder.core.models.result import Result
-from autograder.core.models.test_result import TestResult
+from autograder.services.criteria_tree_service import CriteriaTree, Criteria, Subject, Test, TestCall
+from autograder.models.dataclass.result import Result
+from autograder.models.dataclass.test_result import TestResult
 from autograder.builder.models.template import Template
 from autograder.builder.models.test_function import TestFunction
-from autograder.core.grading.grader import Grader
+from autograder.services.grader_service import Grader
 
 # ===============================================================
 # Mock Template Library based on the new TestFunction model
diff --git a/tests/unit/test_facade.py b/tests/unit/test_facade.py
index 5057222..4c39c67 100644
--- a/tests/unit/test_facade.py
+++ b/tests/unit/test_facade.py
@@ -4,8 +4,8 @@
 from autograder.autograder_facade import Autograder
 from connectors.models.autograder_request import AutograderRequest
 from connectors.models.assignment_config import AssignmentConfig
-from autograder.core.models.autograder_response import AutograderResponse
-from autograder.core.models.result import Result
+from autograder.models.dataclass.autograder_response import AutograderResponse
+from autograder.models.dataclass.result import Result
 
 
 class TestAutograderFacade(unittest.TestCase):

From 6e1e93ec72156007c127246431b197d061f73a1a Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Wed, 24 Dec 2025 13:18:58 -0300
Subject: [PATCH 02/49] feat: first glance of pipeline architecture

---
 autograder/autograder.py                      |   2 +-
 autograder/models/abstract/step.py            |   4 +-
 .../models/dataclass/preflight_error.py       |  24 ++
 autograder/models/dataclass/step_result.py    |  19 +-
 autograder/pipeline.py                        |  15 +-
 autograder/services/pre_flight_service.py     |  84 +++++--
 autograder/steps/pre_flight_step.py           |  66 ++++-
 tests/autograder/__init__.py                  |   0
 tests/autograder/builder/__init__.py          |   0
 tests/autograder/builder/test_tree.py         |   0
 tests/autograder/core/__init__.py             |   0
 tests/autograder/core/reporter/__init__.py    |   0
 .../core/reporter/test_ai_reporter.py         |   0
 .../core/reporter/test_default_reporter.py    |   0
 tests/autograder/core/test_grader.py          |   0
 tests/autograder/test_facade.py               | 236 ++++++++++++++++++
 tests/data/curl_examples.sh                   |   0
 tests/data/custom_template/criteria.json      |  24 --
 tests/data/custom_template/main.py            |  12 -
 tests/data/essay/criteria.json                |  25 --
 tests/data/essay/essay.txt                    |   1 -
 tests/data/essay/feedback.json                |   5 -
 tests/data/input_output/criteria.json         |  33 ---
 tests/data/input_output/requirements.txt      |   1 -
 tests/data/input_output/setup.json            |   7 -
 tests/data/web_dev/style.css                  |  44 ----
 tests/playroom/run_all_playrooms.py           | 160 ------------
 tests/unit/test_preflight_step.py             |  87 +++++++
 28 files changed, 500 insertions(+), 349 deletions(-)
 create mode 100644 autograder/models/dataclass/preflight_error.py
 create mode 100644 tests/autograder/__init__.py
 create mode 100644 tests/autograder/builder/__init__.py
 create mode 100644 tests/autograder/builder/test_tree.py
 create mode 100644 tests/autograder/core/__init__.py
 create mode 100644 tests/autograder/core/reporter/__init__.py
 create mode 100644 tests/autograder/core/reporter/test_ai_reporter.py
 create mode 100644 tests/autograder/core/reporter/test_default_reporter.py
 create mode 100644 tests/autograder/core/test_grader.py
 create mode 100644 tests/autograder/test_facade.py
 mode change 100755 => 100644 tests/data/curl_examples.sh
 create mode 100644 tests/unit/test_preflight_step.py

diff --git a/autograder/autograder.py b/autograder/autograder.py
index ef92735..a082d0d 100644
--- a/autograder/autograder.py
+++ b/autograder/autograder.py
@@ -27,7 +27,7 @@ def build_pipeline(
     if include_feedback:
         reporter_service = ReporterFactory.create_reporter_for(feedback_mode)
         pipeline.add_step(FeedbackStep(reporter_service,feedback_config))
-    pipeline.add_step(ExporterStep(UpstashDriver)) # Placeholder for remote driver
+    pipeline.add_step(ExporterStep(UpstashDriver))
     return pipeline
 
 
diff --git a/autograder/models/abstract/step.py b/autograder/models/abstract/step.py
index d9830f4..eef6a9b 100644
--- a/autograder/models/abstract/step.py
+++ b/autograder/models/abstract/step.py
@@ -1,9 +1,11 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
+from autograder.models.dataclass.step_result import StepResult
+
 
 class Step(ABC):
     @abstractmethod
-    def execute(self, input: Any) -> Any:
+    def execute(self, input: Any) -> StepResult[Any]:
         pass
 
diff --git a/autograder/models/dataclass/preflight_error.py b/autograder/models/dataclass/preflight_error.py
new file mode 100644
index 0000000..7ddb370
--- /dev/null
+++ b/autograder/models/dataclass/preflight_error.py
@@ -0,0 +1,24 @@
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional
+
+
+class PreflightCheckType(Enum):
+    """Types of preflight checks that can fail."""
+    FILE_CHECK = "file_check"
+    SETUP_COMMAND = "setup_command"
+
+
+@dataclass
+class PreflightError:
+    """
+    Represents an error found during pre-flight checks.
+
+    Attributes:
+        type: The type of error (file check or setup command)
+        message: The error message describing what went wrong
+        details: Optional additional context about the error
+    """
+    type: PreflightCheckType
+    message: str
+    details: Optional[dict] = None
diff --git a/autograder/models/dataclass/step_result.py b/autograder/models/dataclass/step_result.py
index 18f1c48..a64686b 100644
--- a/autograder/models/dataclass/step_result.py
+++ b/autograder/models/dataclass/step_result.py
@@ -1,14 +1,23 @@
 from dataclasses import dataclass
-from typing import Any, Optional
+from typing import Any, Optional, TypeVar, Generic
+from enum import Enum
+
+T = TypeVar('T')
+
+
+class StepStatus(Enum):
+    SUCCESS = "success"
+    FAIL = "fail"
+
 
-# This should be a generic
 @dataclass
-class StepResult:
-    data: Any
+class StepResult(Generic[T]):
+    data: T
+    status: StepStatus = StepStatus.SUCCESS
     error: Optional[str] = None
     failed_at_step: Optional[str] = None
     original_input: Any = None
 
     @property
     def is_successful(self) -> bool:
-        return self.error is None
\ No newline at end of file
+        return self.status == StepStatus.SUCCESS and self.error is None
diff --git a/autograder/pipeline.py b/autograder/pipeline.py
index 8859caa..18be298 100644
--- a/autograder/pipeline.py
+++ b/autograder/pipeline.py
@@ -1,6 +1,6 @@
 from autograder.models.dataclass.grading_result import GradingResult
 from autograder.models.abstract.step import Step
-from autograder.models.dataclass.step_result import StepResult
+from autograder.models.dataclass.step_result import StepResult, StepStatus
 
 
 class AutograderPipeline:
@@ -8,22 +8,23 @@ def __init__(self):
         self._steps = []
 
     def add_step(self, step: Step) -> None:
-        self._filters.append(step)
+        self._steps.append(step)
 
     def run(self, input_data):
-        result = StepResult(data=input_data, original_input=input_data) #Initialize result object with input data
+        result = StepResult(data=input_data, status=StepStatus.SUCCESS, original_input=input_data) #Initialize result object with input data
 
         for step in self._steps:
             if not result.is_successful:
                 break
             try:
-                result.data = step.execute(result.data)
+                result = step.execute(result.data)
             except Exception as e:
                 result.error = str(e)
+                result.status = StepStatus.FAIL
                 result.failed_at_step = step.__class__.__name__
 
         if not result.is_successful:
-            return GradingResult(
+            return GradingResult( #Maybe return a ErrorResponse object?
                 final_score=0.0,
                 status="error",
                 feedback=None,
@@ -32,5 +33,7 @@ def run(self, input_data):
                 failed_at_step=result.failed_at_step,
             )
         else:
-            return result.data # Assuming the final step returns a GradingResult (Which is bad)
+            return result.data # Assuming the final step returns a GradingResult
+
+
 
diff --git a/autograder/services/pre_flight_service.py b/autograder/services/pre_flight_service.py
index 9beef8f..1e89219 100644
--- a/autograder/services/pre_flight_service.py
+++ b/autograder/services/pre_flight_service.py
@@ -1,38 +1,80 @@
 import logging
-from autograder.context import request_context
+from typing import List
+from autograder.models.dataclass.preflight_error import PreflightError, PreflightCheckType
+
+
 class PreFlightService:
-    def __init__(self,required_files=None,setup_commands=None):
-        self.required_files = required_files if required_files else []
-        self.setup_commands = setup_commands if setup_commands else []
-        self.fatal_errors = []
+    def __init__(self, setup_config):
+        self.required_files = setup_config.get('required_files', [])
+        self.setup_commands = setup_config.get('setup_commands', [])
+        self.fatal_errors: List[PreflightError] = []
         self.logger = logging.getLogger("PreFlight")
 
-    def check_required_files(self):
+    def check_required_files(self, submission_files) -> bool:
         """
         Checks for the existence of required files in the submission.
+        Returns True if all required files exist, False otherwise.
         """
-        request = request_context.get_request()
-        submission_files = request.submission_files
         self.logger.debug("Checking required files")
+
+        if not self.required_files:
+            self.logger.debug("No required files to check")
+            return True
+
         for file in self.required_files:
             if file not in submission_files:
                 error_msg = f"**Erro:** Arquivo ou diretório obrigatório não encontrado: `'{file}'`"
                 self.logger.error(error_msg)
-                self.fatal_errors.append({"type": "file_check", "message": error_msg})
+                self.fatal_errors.append(PreflightError(
+                    type=PreflightCheckType.FILE_CHECK,
+                    message=error_msg,
+                    details={"missing_file": file}
+                ))
 
-    @classmethod
-    def run(cls):
+        # Return True only if no file check errors were added
+        file_check_errors = [e for e in self.fatal_errors if e.type == PreflightCheckType.FILE_CHECK]
+        return len(file_check_errors) == 0
+
+    def check_setup_commands(self) -> bool:
         """
-        Creates a PreFlight instance and runs the pre-flight checks.
+        Executes setup commands in a sandbox environment.
+        Returns True if all commands succeed, False otherwise.
+
+        TODO: Implement sandbox container creation and command execution.
+        Note: Should validate that sandbox is required if setup_commands are present.
         """
-        request = request_context.get_request()
-        setup_dict = request.assignment_config.setup
-        preflight = cls(
-            required_files=setup_dict.get('file_checks', []),
-            setup_commands=setup_dict.get('commands', [])
-        )
-        preflight.check_required_files()
-        # Future: Add command execution logic here if needed
-        return preflight.fatal_errors
+        self.logger.debug("Checking setup commands")
+
+        if not self.setup_commands:
+            self.logger.debug("No setup commands to execute")
+            return True
+
+        # TODO: Implement actual setup command execution
+        # This should:
+        # 1. Create sandbox container
+        # 2. Execute each command
+        # 3. Check exit codes
+        # 4. Append PreflightError if any command fails
+
+        return True
+
+    def has_errors(self) -> bool:
+        """Check if any fatal errors were found during preflight checks."""
+        return len(self.fatal_errors) > 0
+
+    def get_error_messages(self) -> List[str]:
+        """Get all error messages as a list of strings."""
+        return [error.message for error in self.fatal_errors]
+
+
+"""
+Setup commands here is a problem. 
+The pre-flight service should be responsible for also creating the sandbox container
+and executing the setup commmands, so that if one of them fails, the pipeline already stops
+However, it's important to check if there's really a need for creating a sandbox. 
+Maybe add a config validation step before the pipeline starts?
+Example: If someone sets setup commands but the template does not require a sandbox,
+it should raise a configuration error before starting the pipeline.
+"""
 
 
diff --git a/autograder/steps/pre_flight_step.py b/autograder/steps/pre_flight_step.py
index 08d27cc..76f0785 100644
--- a/autograder/steps/pre_flight_step.py
+++ b/autograder/steps/pre_flight_step.py
@@ -1,10 +1,70 @@
 from autograder.models.abstract.step import Step
+from autograder.models.dataclass.step_result import StepResult, StepStatus
 from autograder.services.pre_flight_service import PreFlightService
 
 
 class PreFlightStep(Step):
+    """
+    Pre-flight check step that validates submission before grading begins.
+
+    Checks are run in order:
+    1. Required files check
+    2. Setup commands check (only if files check passes)
+
+    If any check fails, the step returns a FAIL status with error details.
+    """
+
     def __init__(self, setup_config):
         self._setup_config = setup_config
-        self._pre_flight_service = PreFlightService
-    def execute(self, input):
-        pass
\ No newline at end of file
+        self._pre_flight_service = PreFlightService(setup_config)
+
+    def execute(self, input) -> StepResult:
+        """
+        Execute pre-flight checks on the submission.
+
+        Args:
+            input: Submission data (typically file list or submission object)
+
+        Returns:
+            StepResult with status SUCCESS if all checks pass, FAIL otherwise
+        """
+        # Check required files first
+        if self._setup_config.get('required_files'):
+            files_ok = self._pre_flight_service.check_required_files(input)
+            if not files_ok:
+                # File check failed, don't continue to setup commands
+                return StepResult(
+                    data=input,
+                    status=StepStatus.FAIL,
+                    error=self._format_errors(),
+                    failed_at_step=self.__class__.__name__,
+                    original_input=input
+                )
+
+        # Check setup commands only if file check passed
+        if self._setup_config.get('setup_commands'):
+            setup_ok = self._pre_flight_service.check_setup_commands()
+            if not setup_ok:
+                return StepResult(
+                    data=input,
+                    status=StepStatus.FAIL,
+                    error=self._format_errors(),
+                    failed_at_step=self.__class__.__name__,
+                    original_input=input
+                )
+
+        # All checks passed
+        return StepResult(
+            data=input,
+            status=StepStatus.SUCCESS,
+            original_input=input
+        )
+
+    def _format_errors(self) -> str:
+        """Format all preflight errors into a single error message."""
+        if not self._pre_flight_service.has_errors():
+            return "Unknown preflight error"
+
+        error_messages = self._pre_flight_service.get_error_messages()
+        return "\n".join(error_messages)
+
diff --git a/tests/autograder/__init__.py b/tests/autograder/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/builder/__init__.py b/tests/autograder/builder/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/builder/test_tree.py b/tests/autograder/builder/test_tree.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/core/__init__.py b/tests/autograder/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/core/reporter/__init__.py b/tests/autograder/core/reporter/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/core/reporter/test_ai_reporter.py b/tests/autograder/core/reporter/test_ai_reporter.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/core/reporter/test_default_reporter.py b/tests/autograder/core/reporter/test_default_reporter.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/core/test_grader.py b/tests/autograder/core/test_grader.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autograder/test_facade.py b/tests/autograder/test_facade.py
new file mode 100644
index 0000000..79e428b
--- /dev/null
+++ b/tests/autograder/test_facade.py
@@ -0,0 +1,236 @@
+import unittest
+from unittest.mock import patch, Mock
+
+from autograder.pipeline.autograder_facade import Autograder
+from autograder.models.autograder_request import AutograderRequest
+from autograder.models.assignment_config import AssignmentConfig
+from autograder.models.dataclass.autograder_response import AutograderResponse
+from autograder.models.dataclass.result import Result
+
+
+class TestAutograderFacade(unittest.TestCase):
+
+    def setUp(self):
+        # Common test data
+        self.mock_submission = {"file.py": "print('hello')"}
+        self.mock_criteria = {"base": {"subjects": {"test": {"tests": ["passing_test"]}}}}
+        self.mock_feedback_prefs = {"general": {}}
+
+        self.mock_assignment_config = AssignmentConfig(
+            criteria=self.mock_criteria,
+            feedback=self.mock_feedback_prefs,
+            setup={},
+            template="web dev"
+        )
+
+        # A standard successful result from the Grader
+        self.mock_grader_result = Result(
+            final_score=85.0,
+            author="test_student",
+            submission_file=self.mock_submission,
+            base_results=[], bonus_results=[], penalty_results=[]
+        )
+
+    @patch('pipeline.autograder_facade.CriteriaTree')
+    @patch('pipeline.autograder_facade.TemplateLibrary')
+    @patch('pipeline.autograder_facade.Grader')
+    @patch('pipeline.autograder_facade.Reporter')
+    def test_grade_success_default_feedback(self, mock_reporter, mock_grader, mock_template_library,
+                                            mock_criteria_tree):
+        """A successful grading run that returns generated default feedback."""
+        # Arrange
+        # Create a fake template object with the attributes the facade expects
+        fake_template = Mock()
+        fake_template.requires_pre_executed_tree = False
+        fake_template.template_name = "web dev"
+        fake_template.stop = Mock()
+
+        mock_template_library.get_template.return_value = fake_template
+
+        fake_tree = Mock()
+        fake_tree.print_pre_executed_tree = Mock()
+        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
+
+        mock_grader.return_value.run.return_value = self.mock_grader_result
+
+        fake_reporter = Mock()
+        fake_reporter.generate_feedback.return_value = "Great job!"
+        mock_reporter.create_default_reporter.return_value = fake_reporter
+
+        autograder_request = AutograderRequest(
+            submission_files=self.mock_submission,
+            assignment_config=self.mock_assignment_config,
+            student_name="test_student",
+            include_feedback=True,
+            feedback_mode="default"
+        )
+
+        # Act
+        response = Autograder.grade(autograder_request)
+
+        # Assert
+        self.assertIsInstance(response, AutograderResponse)
+        self.assertEqual(response.status, "Success")
+        self.assertEqual(response.final_score, 85.0)
+        self.assertEqual(response.feedback, "Great job!")
+
+        mock_template_library.get_template.assert_called_once_with("web dev")
+        mock_criteria_tree.build_non_executed_tree.assert_called_once()
+        mock_grader.return_value.run.assert_called_once()
+        mock_reporter.create_default_reporter.assert_called_once()
+
+    @patch('pipeline.autograder_facade.TemplateLibrary')
+    def test_grade_failure_invalid_template(self, mock_template_library):
+        """If TemplateLibrary returns None, the facade should fail with an informative message."""
+        # Arrange
+        mock_template_library.get_template.return_value = None
+
+        invalid_config = AssignmentConfig(
+            criteria = self.mock_criteria,
+            feedback = self.mock_feedback_prefs,
+            setup = {},
+            template="invalid template"
+        )
+        autograder_request = AutograderRequest(
+            submission_files=self.mock_submission,
+            assignment_config=invalid_config,
+            student_name="student"
+        )
+
+        # Act
+        response = Autograder.grade(autograder_request)
+
+        # Assert
+        self.assertEqual(response.status, "fail")
+        self.assertEqual(response.final_score, 0.0)
+        self.assertIn("Unsupported template: invalid template", response.feedback)
+
+    @patch('pipeline.autograder_facade.CriteriaTree')
+    @patch('pipeline.autograder_facade.TemplateLibrary')
+    @patch('pipeline.autograder_facade.Grader')
+    def test_grade_failure_during_grading(self, mock_grader, mock_template_library, mock_criteria_tree):
+        """If the Grader raises an exception the facade should return a failure response containing the error."""
+        # Arrange
+        fake_template = Mock()
+        fake_template.requires_pre_executed_tree = False
+        fake_template.template_name = "web dev"
+        fake_template.stop = Mock()
+        mock_template_library.get_template.return_value = fake_template
+
+        fake_tree = Mock()
+        fake_tree.print_pre_executed_tree = Mock()
+        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
+
+        mock_grader.return_value.run.side_effect = Exception("Something went wrong in the grader")
+
+        autograder_request = AutograderRequest(
+            submission_files=self.mock_submission,
+            assignment_config=self.mock_assignment_config,
+            student_name="test_student"
+        )
+
+        # Act
+        response = Autograder.grade(autograder_request)
+
+        # Assert
+        self.assertEqual(response.status, "fail")
+        self.assertEqual(response.final_score, 0.0)
+        self.assertIn("Something went wrong in the grader", response.feedback)
+
+    @patch('pipeline.autograder_facade.CriteriaTree')
+    @patch('pipeline.autograder_facade.TemplateLibrary')
+    @patch('pipeline.autograder_facade.Grader')
+    def test_grade_failure_ai_missing_credentials(self, mock_grader, mock_template_library, mock_criteria_tree):
+        """AI feedback mode without the required keys should fail with an explanatory message."""
+        # Arrange
+        fake_template = Mock()
+        fake_template.requires_pre_executed_tree = False
+        fake_template.template_name = "web dev"
+        fake_template.stop = Mock()
+        mock_template_library.get_template.return_value = fake_template
+
+        fake_tree = Mock()
+        fake_tree.print_pre_executed_tree = Mock()
+        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
+
+        mock_grader.return_value.run.return_value = self.mock_grader_result
+
+        autograder_request = AutograderRequest(
+            submission_files=self.mock_submission,
+            assignment_config=self.mock_assignment_config,
+            student_name="test_student",
+            include_feedback=True,
+            feedback_mode="ai",
+            openai_key=None  # missing keys
+        )
+
+        # Act
+        response = Autograder.grade(autograder_request)
+
+        # Assert
+        self.assertEqual(response.status, "fail")
+        self.assertEqual(response.final_score, 0.0)
+        self.assertIn("OpenAI key, Redis URL, and Redis token are required", response.feedback)
+
+    @patch('pipeline.autograder_facade.PreFlight')
+    def test_preflight_failure_stops_processing(self, mock_preflight):
+        """If pre-flight returns impediments, grading should stop and return those messages."""
+        # Arrange
+        mock_preflight.run.return_value = [{'message': 'setup failed due to X'}]
+
+        config_with_setup = AssignmentConfig(
+             criteria=self.mock_criteria,
+             feedback=self.mock_feedback_prefs,
+             setup={'cmds': []},
+             template="web dev"
+        )
+        autograder_request = AutograderRequest(
+            submission_files=self.mock_submission,
+            assignment_config=config_with_setup,
+            student_name="student"
+        )
+
+        # Act
+        response = Autograder.grade(autograder_request)
+
+        # Assert
+        self.assertEqual(response.status, "fail")
+        self.assertEqual(response.final_score, 0.0)
+        self.assertIn('setup failed due to X', response.feedback)
+
+    @patch('pipeline.autograder_facade.CriteriaTree')
+    @patch('pipeline.autograder_facade.TemplateLibrary')
+    @patch('pipeline.autograder_facade.Grader')
+    def test_no_feedback_requested_returns_score_only(self, mock_grader, mock_template_library, mock_criteria_tree):
+        """When include_feedback is False, the facade should return the score and an empty feedback string."""
+        # Arrange
+        fake_template = Mock()
+        fake_template.requires_pre_executed_tree = False
+        fake_template.template_name = "web dev"
+        fake_template.stop = Mock()
+        mock_template_library.get_template.return_value = fake_template
+
+        fake_tree = Mock()
+        fake_tree.print_pre_executed_tree = Mock()
+        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
+
+        mock_grader.return_value.run.return_value = self.mock_grader_result
+
+        autograder_request = AutograderRequest(
+            submission_files=self.mock_submission,
+            assignment_config=self.mock_assignment_config,
+            student_name="test_student",
+            include_feedback=False
+        )
+
+        # Act
+        response = Autograder.grade(autograder_request)
+
+        # Assert
+        self.assertEqual(response.status, "Success")
+        self.assertEqual(response.final_score, 85.0)
+        self.assertEqual(response.feedback, "")
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/data/curl_examples.sh b/tests/data/curl_examples.sh
old mode 100755
new mode 100644
diff --git a/tests/data/custom_template/criteria.json b/tests/data/custom_template/criteria.json
index 4c8485c..e69de29 100644
--- a/tests/data/custom_template/criteria.json
+++ b/tests/data/custom_template/criteria.json
@@ -1,24 +0,0 @@
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "custom_tests": {
-        "weight": 100,
-        "tests": [
-          {
-            "name": "check_file_exists",
-            "calls": [
-              ["main.py"]
-            ]
-          },
-          {
-            "name": "check_function_exists",
-            "calls": [
-              ["greet"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/custom_template/main.py b/tests/data/custom_template/main.py
index 7c1b20f..e69de29 100644
--- a/tests/data/custom_template/main.py
+++ b/tests/data/custom_template/main.py
@@ -1,12 +0,0 @@
-def greet(name):
-    """Simple greeting function."""
-    return f"Hello, {name}!"
-
-
-def main():
-    """Main function."""
-    print(greet("World"))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/data/essay/criteria.json b/tests/data/essay/criteria.json
index 4f53368..e69de29 100644
--- a/tests/data/essay/criteria.json
+++ b/tests/data/essay/criteria.json
@@ -1,25 +0,0 @@
-{
-  "test_library": "essay",
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "foundations": {
-        "weight": 60,
-        "tests": [
-          { "file": "essay.txt", "name": "thesis_statement" },
-          { "file": "essay.txt", "name": "clarity_and_cohesion" }
-        ]
-      },
-      "prompt_adherence": {
-        "weight": 40,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "adherence_to_prompt",
-            "calls": [["Discuss the impact of AI on the workforce."]]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/essay/essay.txt b/tests/data/essay/essay.txt
index 99f928e..e69de29 100644
--- a/tests/data/essay/essay.txt
+++ b/tests/data/essay/essay.txt
@@ -1 +0,0 @@
-Artificial intelligence (AI) is reshaping the global economy and the modern workplace. While AI augments human capabilities and automates repetitive tasks, it also raises questions about job displacement, fairness, and the need for upskilling. To navigate this transition responsibly, educators and organizations must emphasize critical thinking, ethical use of AI, and collaboration between humans and intelligent systems. With thoughtful policy and training, AI can become a multiplier for productivity and creativity rather than a replacement for human potential.
diff --git a/tests/data/essay/feedback.json b/tests/data/essay/feedback.json
index 0bdac9a..e69de29 100644
--- a/tests/data/essay/feedback.json
+++ b/tests/data/essay/feedback.json
@@ -1,5 +0,0 @@
-{
-  "style": "concise",
-  "include_suggestions": true,
-  "tone": "supportive"
-}
diff --git a/tests/data/input_output/criteria.json b/tests/data/input_output/criteria.json
index b643bd7..e69de29 100644
--- a/tests/data/input_output/criteria.json
+++ b/tests/data/input_output/criteria.json
@@ -1,33 +0,0 @@
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "basic_operations": {
-        "weight": 50,
-        "tests": [
-          {
-            "name": "expect_output",
-            "calls": [
-              [["add", "5", "3"], "8"],
-              [["subtract", "10", "4"], "6"],
-              [["multiply", "7", "6"], "42"]
-            ]
-          }
-        ]
-      },
-      "edge_cases": {
-        "weight": 50,
-        "tests": [
-          {
-            "name": "expect_output",
-            "calls": [
-              [["add", "0", "0"], "0"],
-              [["multiply", "5", "0"], "0"],
-              [["divide", "10", "2"], "5.0"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/input_output/requirements.txt b/tests/data/input_output/requirements.txt
index d949add..e69de29 100644
--- a/tests/data/input_output/requirements.txt
+++ b/tests/data/input_output/requirements.txt
@@ -1 +0,0 @@
-# No external dependencies required for this simple calculator
diff --git a/tests/data/input_output/setup.json b/tests/data/input_output/setup.json
index 553ee7d..e69de29 100644
--- a/tests/data/input_output/setup.json
+++ b/tests/data/input_output/setup.json
@@ -1,7 +0,0 @@
-{
-  "runtime_image": "python:3.11-slim",
-  "start_command": "python calculator.py",
-  "commands": {
-    "install_dependencies": "pip install --no-cache-dir -r requirements.txt"
-  }
-}
diff --git a/tests/data/web_dev/style.css b/tests/data/web_dev/style.css
index 646e36a..e69de29 100644
--- a/tests/data/web_dev/style.css
+++ b/tests/data/web_dev/style.css
@@ -1,44 +0,0 @@
-.container {
-    max-width: 1200px;
-    margin: 0 auto;
-    padding: 20px;
-}
-
-.row {
-    display: flex;
-    gap: 20px;
-}
-
-h1 {
-    color: #333333;
-    font-size: 32px;
-}
-
-h2 {
-    color: #666666;
-    font-size: 24px;
-}
-
-p {
-    color: #444444;
-    font-size: 16px;
-    line-height: 1.6;
-}
-
-.card {
-    background: #f5f5f5;
-    padding: 15px;
-    border-radius: 8px;
-}
-
-nav a {
-    color: #007bff;
-    text-decoration: none;
-    margin-right: 15px;
-}
-
-footer {
-    margin-top: 40px;
-    border-top: 1px solid #ddd;
-    padding-top: 20px;
-}
diff --git a/tests/playroom/run_all_playrooms.py b/tests/playroom/run_all_playrooms.py
index 17f66bb..e69de29 100644
--- a/tests/playroom/run_all_playrooms.py
+++ b/tests/playroom/run_all_playrooms.py
@@ -1,160 +0,0 @@
-"""
-Run All Playrooms
-
-This script allows you to run all playrooms or individual ones for testing purposes.
-
-Usage:
-    python -m tests.playroom.run_all_playrooms              # Run all playrooms
-    python -m tests.playroom.run_all_playrooms webdev       # Run only webdev playroom
-    python -m tests.playroom.run_all_playrooms api essay    # Run multiple playrooms
-    python -m tests.playroom.run_all_playrooms --list       # List available playrooms
-"""
-
-import sys
-import argparse
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-
-# Import all playroom functions
-from tests.playroom.webdev_playroom import run_webdev_playroom
-from tests.playroom.api_playroom import run_api_playroom
-from tests.playroom.essay_playroom import run_essay_playroom
-from tests.playroom.io_playroom import run_io_playroom
-
-
-# Map of playroom names to their runner functions
-PLAYROOMS = {
-    "webdev": {
-        "name": "Web Development",
-        "runner": run_webdev_playroom,
-        "description": "Tests HTML/CSS grading with Bootstrap and custom classes"
-    },
-    "api": {
-        "name": "API Testing",
-        "runner": run_api_playroom,
-        "description": "Tests REST API endpoints in a Docker container"
-    },
-    "essay": {
-        "name": "Essay Grading",
-        "runner": run_essay_playroom,
-        "description": "Tests AI-powered essay grading (requires OpenAI API key)"
-    },
-    "io": {
-        "name": "Input/Output",
-        "runner": run_io_playroom,
-        "description": "Tests command-line programs with stdin/stdout validation"
-    }
-}
-
-
-def list_playrooms():
-    """Display all available playrooms."""
-    print("\n" + "="*70)
-    print("AVAILABLE PLAYROOMS")
-    print("="*70 + "\n")
-
-    for key, info in PLAYROOMS.items():
-        print(f"  {key:10} - {info['name']}")
-        print(f"             {info['description']}")
-        print()
-
-    print("="*70 + "\n")
-
-
-def run_playroom(playroom_key: str):
-    """Run a specific playroom by its key."""
-    if playroom_key not in PLAYROOMS:
-        print(f"❌ Error: Unknown playroom '{playroom_key}'")
-        print(f"   Available playrooms: {', '.join(PLAYROOMS.keys())}")
-        return False
-
-    try:
-        PLAYROOMS[playroom_key]["runner"]()
-        return True
-    except Exception as e:
-        print(f"\n❌ Error running {playroom_key} playroom: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def run_all():
-    """Run all playrooms sequentially."""
-    print("\n" + "#"*70)
-    print("# RUNNING ALL PLAYROOMS")
-    print("#"*70 + "\n")
-
-    results = {}
-    for key in PLAYROOMS.keys():
-        success = run_playroom(key)
-        results[key] = success
-        print("\n" + "-"*70 + "\n")
-
-    # Summary
-    print("\n" + "="*70)
-    print("SUMMARY")
-    print("="*70 + "\n")
-
-    for key, success in results.items():
-        status = "✅ SUCCESS" if success else "❌ FAILED"
-        print(f"  {PLAYROOMS[key]['name']:20} - {status}")
-
-    total = len(results)
-    passed = sum(1 for s in results.values() if s)
-    print(f"\n  Total: {passed}/{total} playrooms completed successfully")
-    print("\n" + "="*70 + "\n")
-
-
-def main():
-    """Main entry point for the playroom runner."""
-    parser = argparse.ArgumentParser(
-        description="Run autograder playrooms for testing",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  python -m tests.playroom.run_all_playrooms              # Run all playrooms
-  python -m tests.playroom.run_all_playrooms webdev       # Run only webdev
-  python -m tests.playroom.run_all_playrooms api essay    # Run multiple
-  python -m tests.playroom.run_all_playrooms --list       # List available
-        """
-    )
-
-    parser.add_argument(
-        'playrooms',
-        nargs='*',
-        help='Specific playrooms to run (e.g., webdev api). If none specified, runs all.'
-    )
-
-    parser.add_argument(
-        '--list',
-        action='store_true',
-        help='List all available playrooms'
-    )
-
-    args = parser.parse_args()
-
-    # Handle --list flag
-    if args.list:
-        list_playrooms()
-        return
-
-    # If no playrooms specified, run all
-    if not args.playrooms:
-        run_all()
-        return
-
-    # Run specified playrooms
-    print(f"\n🎮 Running {len(args.playrooms)} playroom(s)...\n")
-    for playroom_key in args.playrooms:
-        run_playroom(playroom_key)
-        if len(args.playrooms) > 1:
-            print("\n" + "-"*70 + "\n")
-
-
-if __name__ == "__main__":
-    main()
-
diff --git a/tests/unit/test_preflight_step.py b/tests/unit/test_preflight_step.py
new file mode 100644
index 0000000..16d6b7c
--- /dev/null
+++ b/tests/unit/test_preflight_step.py
@@ -0,0 +1,87 @@
+import unittest
+from autograder.steps.pre_flight_step import PreFlightStep
+from autograder.models.dataclass.step_result import StepStatus
+from autograder.models.dataclass.preflight_error import PreflightCheckType
+
+
+class TestPreFlightStep(unittest.TestCase):
+
+    def test_no_required_files_passes(self):
+        """Test that step passes when no required files are specified"""
+        setup_config = {}
+        step = PreFlightStep(setup_config)
+
+        result = step.execute(['file1.py', 'file2.py'])
+
+        self.assertEqual(result.status, StepStatus.SUCCESS)
+        self.assertIsNone(result.error)
+        self.assertTrue(result.is_successful)
+
+    def test_required_files_all_present_passes(self):
+        """Test that step passes when all required files are present"""
+        setup_config = {
+            'required_files': ['file1.py', 'file2.py']
+        }
+        step = PreFlightStep(setup_config)
+
+        result = step.execute(['file1.py', 'file2.py', 'file3.py'])
+
+        self.assertEqual(result.status, StepStatus.SUCCESS)
+        self.assertIsNone(result.error)
+        self.assertTrue(result.is_successful)
+
+    def test_required_files_missing_fails(self):
+        """Test that step fails when required files are missing"""
+        setup_config = {
+            'required_files': ['file1.py', 'file2.py']
+        }
+        step = PreFlightStep(setup_config)
+
+        result = step.execute(['file1.py'])  # file2.py is missing
+
+        self.assertEqual(result.status, StepStatus.FAIL)
+        self.assertIsNotNone(result.error)
+        self.assertFalse(result.is_successful)
+        self.assertIn('file2.py', result.error)
+        self.assertEqual(result.failed_at_step, 'PreFlightStep')
+
+    def test_multiple_missing_files_all_reported(self):
+        """Test that all missing files are reported in the error"""
+        setup_config = {
+            'required_files': ['file1.py', 'file2.py', 'file3.py']
+        }
+        step = PreFlightStep(setup_config)
+
+        result = step.execute(['file1.py'])  # file2.py and file3.py are missing
+
+        self.assertEqual(result.status, StepStatus.FAIL)
+        self.assertIn('file2.py', result.error)
+        self.assertIn('file3.py', result.error)
+
+    def test_setup_commands_not_run_when_file_check_fails(self):
+        """Test that setup commands are not checked if file check fails"""
+        setup_config = {
+            'required_files': ['missing.py'],
+            'setup_commands': ['npm install']
+        }
+        step = PreFlightStep(setup_config)
+
+        result = step.execute(['other.py'])
+
+        # Should fail on file check, not even attempt setup commands
+        self.assertEqual(result.status, StepStatus.FAIL)
+        self.assertIn('missing.py', result.error)
+
+        # Verify only file check errors are present
+        file_check_errors = [e for e in step._pre_flight_service.fatal_errors
+                           if e.type == PreflightCheckType.FILE_CHECK]
+        setup_errors = [e for e in step._pre_flight_service.fatal_errors
+                       if e.type == PreflightCheckType.SETUP_COMMAND]
+
+        self.assertGreater(len(file_check_errors), 0)
+        self.assertEqual(len(setup_errors), 0)
+
+
+if __name__ == '__main__':
+    unittest.main()
+

From 6367028e07b11768c79425012a79d70f78fbc4c6 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Fri, 26 Dec 2025 18:31:07 -0300
Subject: [PATCH 03/49] feat: criteria tree printer

---
 autograder/models/criteria_tree.py           | 161 +++++--------------
 autograder/utils/formatters/__init__.py      |   0
 autograder/utils/formatters/criteria_tree.py |  45 ++++++
 autograder/utils/printers/__init__.py        |   0
 autograder/utils/printers/criteria_tree.py   |  45 ++++++
 autograder/utils/processers/__init__.py      |   0
 autograder/utils/processers/criteria_tree.py |  19 +++
 7 files changed, 152 insertions(+), 118 deletions(-)
 create mode 100644 autograder/utils/formatters/__init__.py
 create mode 100644 autograder/utils/formatters/criteria_tree.py
 create mode 100644 autograder/utils/printers/__init__.py
 create mode 100644 autograder/utils/printers/criteria_tree.py
 create mode 100644 autograder/utils/processers/__init__.py
 create mode 100644 autograder/utils/processers/criteria_tree.py

diff --git a/autograder/models/criteria_tree.py b/autograder/models/criteria_tree.py
index af4ec4e..035c6eb 100644
--- a/autograder/models/criteria_tree.py
+++ b/autograder/models/criteria_tree.py
@@ -1,39 +1,36 @@
 from typing import List, Any
 from autograder.models.dataclass.test_result import TestResult
+from autograder.utils.formatters.criteria_tree import PreExecutedTreeFormatter
+from autograder.utils.printers.criteria_tree import CriteriaTreePrinter
 
 
-# Assuming TestResult is defined in a separate, importable file
-# from autograder.core.models.test_result import TestResult
-
-# ===============================================================
-# 1. Classes for Test Execution
-# ===============================================================
 class TestCall:
     """Represents a single invocation of a test function with its arguments."""
+
     def __init__(self, args: List[Any]):
         self.args = args
 
     def __repr__(self):
         return f"TestCall(args={self.args})"
 
-# ===============================================================
-# 2. Classes for the Tree Structure
-# ===============================================================
 
 class Test:
     """
     Represents a group of calls to a single test function in the library.
     This is a LEAF node in the grading tree.
     """
-    def __init__(self, name: str, filename: str = None):
-        self.name = name
-        self.file = filename  # The file this test operates on (e.g., "index.html")
+
+    def __init__(self, name: str, filename: str | None = None):
+        self.name: str = name
+        self.file: str | None = filename
         self.calls: List[TestCall] = []
 
     def add_call(self, call: TestCall):
         self.calls.append(call)
 
-    def get_result(self, test_library, submission_files, subject_name: str) -> List[TestResult]:
+    def get_result(
+        self, test_library, submission_files, subject_name: str
+    ) -> List[TestResult]:
         """
         Retrieves a TestFunction object from the library and executes it for each TestCall.
         """
@@ -51,7 +48,14 @@ def get_result(self, test_library, submission_files, subject_name: str) -> List[
             else:
                 file_content_to_pass = submission_files.get(self.file)
                 if file_content_to_pass is None:
-                    return [TestResult(self.name, 0, f"Erro: O arquivo necessário '{self.file}' não foi encontrado na submissão.", subject_name)]
+                    return [
+                        TestResult(
+                            self.name,
+                            0,
+                            f"Erro: O arquivo necessário '{self.file}' não foi encontrado na submissão.",
+                            subject_name,
+                        )
+                    ]
 
         # --- Execution Logic ---
         if not self.calls:
@@ -67,7 +71,9 @@ def get_result(self, test_library, submission_files, subject_name: str) -> List[
         for call in self.calls:
             # Execute the 'execute' method of the TestFunction instance
             if file_content_to_pass:
-                result = test_function_instance.execute(file_content_to_pass, *call.args)
+                result = test_function_instance.execute(
+                    file_content_to_pass, *call.args
+                )
             else:
                 result = test_function_instance.execute(*call.args)
             result.subject_name = subject_name
@@ -77,146 +83,65 @@ def get_result(self, test_library, submission_files, subject_name: str) -> List[
     def __repr__(self):
         return f"Test(name='{self.name}', file='{self.file}', calls={len(self.calls)})"
 
+
 class Subject:
     """
-    Represents a subject, which can contain EITHER a list of tests OR
-    a dictionary of nested subjects. This is a BRANCH or LEAF-HOLDER node.
+    Represents a subject, which can contain a list of tests AND/OR
+    a list of nested subjects. This is a BRANCH and/or LEAF-HOLDER node.
     """
+
     def __init__(self, name, weight=0):
         self.name = name
         self.weight = weight
-        self.tests: List[Test] | None = None
-        self.subjects: dict[str, 'Subject'] | None = None
+        self.tests: List[Test] = list()
+        self.subjects: List[Subject] = list()
 
     def __repr__(self):
-        if self.subjects is not None:
-            return f"Subject(name='{self.name}', weight={self.weight}, subjects={len(self.subjects)})"
-        return f"Subject(name='{self.name}', weight={self.weight}, tests={self.tests})"
+        return f"Subject(name={self.name}, weight={self.weight}, subjects={len(self.subjects)}, tests={len(self.tests)})"
 
 
 class TestCategory:
     """
     Represents one of the three main categories: base, bonus, or penalty.
-    Can contain EITHER a list of tests OR a dictionary of subjects (not both).
+    Can contain a list of tests AND/OR a list of subjects.
     """
+
     def __init__(self, name, max_score=100):
         self.name = name
         self.max_score = max_score
-        self.subjects: dict[str, Subject] | None = None
-        self.tests: List[Test] | None = None
+        self.subjects: List[Subject] = list()
+        self.tests: List[Test] = list()
 
     def set_weight(self, weight):
         self.max_score = weight
 
     def add_subject(self, subject: Subject):
-        if self.subjects is None:
-            self.subjects = {}
-        self.subjects[subject.name] = subject
+        self.subjects.append(subject)
+
+    def add_subjects(self, subjects: List[Subject]) -> None:
+        self.subjects.extend(subjects)
 
     def __repr__(self):
-        if self.tests is not None:
-            return f"TestCategory(name='{self.name}', max_score={self.max_score}, tests={len(self.tests)})"
-        return f"TestCategory(name='{self.name}', max_score={self.max_score}, subjects={list(self.subjects.keys()) if self.subjects else []})"
+        return f"TestCategory(name='{self.name}', max_score={self.max_score}, subjects={len(self.subjects)}, tests={len(self.tests)})"
 
 
 class CriteriaTree:
     """The ROOT of the criteria tree."""
+
     def __init__(self, bonus_weight=0, penalty_weight=0):
         self.base = TestCategory("base")
         self.bonus = TestCategory("bonus", max_score=bonus_weight)
         self.penalty = TestCategory("penalty", max_score=penalty_weight)
 
     def __repr__(self):
-        return f"Criteria(categories=['base', 'bonus', 'penalty'])"
+        return "Criteria(categories=['base', 'bonus', 'penalty'])"
 
     def print_tree(self):
         """Prints a visual representation of the entire criteria tree."""
-        print(f"🌲 Criteria Tree")
-        self._print_category(self.base, prefix="  ")
-        self._print_category(self.bonus, prefix="  ")
-        self._print_category(self.penalty, prefix="  ")
-
-    def _print_category(self, category: TestCategory, prefix: str):
-        """Helper method to print a category and its subjects or tests."""
-        if not category.subjects and not category.tests:
-            return
-        print(f"{prefix}📁 {category.name.upper()} (max_score: {category.max_score})")
-        
-        if category.subjects:
-            for subject in category.subjects.values():
-                self._print_subject(subject, prefix=prefix + "    ")
-        
-        if category.tests:
-            for test in category.tests:
-                print(f"{prefix}    - 🧪 {test.name} (file: {test.file})")
-                for call in test.calls:
-                    print(f"{prefix}      - Parameters: {call.args}")
-
-    def _print_subject(self, subject: Subject, prefix: str):
-        """Recursive helper method to print a subject and its contents."""
-        print(f"{prefix}📘 {subject.name} (weight: {subject.weight})")
-
-        if subject.subjects is not None:
-            for sub in subject.subjects.values():
-                self._print_subject(sub, prefix=prefix + "    ")
-
-        if subject.tests is not None:
-            for test in subject.tests:
-                print(f"{prefix}  - 🧪 {test.name} (file: {test.file})")
-                for call in test.calls:
-                    print(f"{prefix}    - Parameters: {call.args}")
+        printer = CriteriaTreePrinter()
+        printer.print_tree(self)
 
     def print_pre_executed_tree(self):
         """Prints a visual representation of the entire pre-executed criteria tree."""
-        print(f"🌲 Pre-Executed Criteria Tree")
-        self._print_pre_executed_category(self.base, prefix="  ")
-        self._print_pre_executed_category(self.bonus, prefix="  ")
-        self._print_pre_executed_category(self.penalty, prefix="  ")
-
-    def _print_pre_executed_category(self, category: TestCategory, prefix: str):
-        """Helper method to print a category and its pre-executed subjects or tests."""
-        if not category.subjects and not category.tests:
-            return
-        print(f"{prefix}📁 {category.name.upper()} (max_score: {category.max_score})")
-        
-        if category.subjects:
-            for subject in category.subjects.values():
-                self._print_pre_executed_subject(subject, prefix=prefix + "    ")
-        
-        if category.tests:
-            # In a pre-executed tree, category.tests contains TestResult objects
-            for result in category.tests:
-                if isinstance(result, TestResult):
-                    params_str = f" (Parameters: {result.parameters})" if result.parameters else ""
-                    print(f"{prefix}    - 📝 {result.test_name}{params_str} -> Score: {result.score}")
-                else:
-                    print(f"{prefix}    - ? Unexpected item in tests list: {result}")
-
-    def _print_pre_executed_subject(self, subject: Subject, prefix: str):
-        """Recursive helper method to print a subject and its pre-executed test results."""
-        print(f"{prefix}📘 {subject.name} (weight: {subject.weight})")
-
-        if subject.subjects is not None:
-            for sub in subject.subjects.values():
-                self._print_pre_executed_subject(sub, prefix=prefix + "    ")
-
-        if subject.tests is not None:
-            # In a pre-executed tree, subject.tests contains TestResult objects
-
-            # In the regular tree, subject.tests contains "Test" objects
-            for result in subject.tests:
-                if isinstance(result, TestResult):
-                    params_str = f" (Parameters: {result.parameters})" if result.parameters else ""
-                    print(f"{prefix}  - 📝 {result.test_name}{params_str} -> Score: {result.score}")
-
-                elif isinstance(result, Test):
-                    print(f"{prefix} - 🧪 {result.name} (file: {result.file})")
-                    """Added the symbol identificator to match the previous formatting"""
-                    for call in result.calls:
-                        print(f"{prefix}    - Parameters: {call.args}")
-                else:
-                    # Fallback for unexpected types
-                    print(f"{prefix}  - ? Unexpected item in tests list: {result}")
-
-
-
+        printer = CriteriaTreePrinter(PreExecutedTreeFormatter())
+        printer.print_tree(self)
diff --git a/autograder/utils/formatters/__init__.py b/autograder/utils/formatters/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autograder/utils/formatters/criteria_tree.py b/autograder/utils/formatters/criteria_tree.py
new file mode 100644
index 0000000..536fcb2
--- /dev/null
+++ b/autograder/utils/formatters/criteria_tree.py
@@ -0,0 +1,45 @@
+from typing import List, override
+from autograder.utils.processers.criteria_tree import CriteriaTreeProcesser
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from autograder.models.dataclass.test_result import TestResult
+    from autograder.models.criteria_tree import TestCategory, Subject, Test
+
+
+class CriteriaTreeFormatter(CriteriaTreeProcesser):
+    def header(self) -> str:
+        return "🌲 Criteria Tree"
+
+    @override
+    def process_test(self, test: "Test") -> List[str]:
+        result: List[str] = list()
+        result.append(f"  🧪 {test.name} (file: {test.file})")
+        for call in test.calls:
+            result.append(f"    - Parameters: {call.args}")
+        return result
+
+    @override
+    def process_subject(self, subject: "Subject") -> str:
+        return f"📘{subject.name} (weight: {subject.weight})"
+
+    @override
+    def process_category(self, category: "TestCategory") -> str:
+        return f"  📁 {category.name.upper()} (max_score: {category.max_score})"
+
+
+class PreExecutedTreeFormatter(CriteriaTreeFormatter):
+    @override
+    def header(self) -> str:
+        return "🌲 Pre-Executed Criteria Tree"
+
+    @override
+    def process_test(self, test: "Test | TestResult") -> List[str]:
+        if isinstance(test, TestResult):
+            if test.parameters:
+                params = f" (Parameters: {test.parameters})"
+            else:
+                params = ""
+            return [f"  - 📝 {test.test_name}{params} -> Score: {test.score}"]
+
+        return super().process_test(test)
diff --git a/autograder/utils/printers/__init__.py b/autograder/utils/printers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autograder/utils/printers/criteria_tree.py b/autograder/utils/printers/criteria_tree.py
new file mode 100644
index 0000000..236e475
--- /dev/null
+++ b/autograder/utils/printers/criteria_tree.py
@@ -0,0 +1,45 @@
+from typing import TYPE_CHECKING
+from autograder.utils.formatters.criteria_tree import CriteriaTreeFormatter
+
+if TYPE_CHECKING:
+    from autograder.models.criteria_tree import CriteriaTree, TestCategory, Subject
+
+
+class CriteriaTreePrinter:
+    def __init__(self, formatter: CriteriaTreeFormatter | None = None) -> None:
+        self.__depth = 0
+        self.__formatter = CriteriaTreeFormatter() if formatter is None else formatter
+
+    def __increase_depth(self) -> None:
+        self.__depth += 1
+
+    def __decrease_depth(self) -> None:
+        self.__depth -= 1
+
+    def __print_with_depth(self, formatted: str) -> None:
+        print(f"{'    ' * self.__depth}{formatted}")
+
+    def __print_children(self, parent: "TestCategory | Subject") -> None:
+        for subject in parent.subjects:
+            self.print_subject(subject)
+
+        for test in parent.tests:
+            lines = self.__formatter.process_test(test)
+            for line in lines:
+                self.__print_with_depth(line)
+
+    def print_subject(self, subject: "Subject") -> None:
+        self.__increase_depth()
+        self.__print_with_depth(self.__formatter.process_subject(subject))
+        self.__print_children(subject)
+        self.__decrease_depth()
+
+    def print_category(self, category: "TestCategory") -> None:
+        self.__print_with_depth(self.__formatter.process_category(category))
+        self.__print_children(category)
+
+    def print_tree(self, tree: "CriteriaTree") -> None:
+        self.__print_with_depth(self.__formatter.header())
+        self.print_category(tree.base)
+        self.print_category(tree.bonus)
+        self.print_category(tree.penalty)
diff --git a/autograder/utils/processers/__init__.py b/autograder/utils/processers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autograder/utils/processers/criteria_tree.py b/autograder/utils/processers/criteria_tree.py
new file mode 100644
index 0000000..6e7e69f
--- /dev/null
+++ b/autograder/utils/processers/criteria_tree.py
@@ -0,0 +1,19 @@
+from abc import ABC, abstractmethod
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from autograder.models.criteria_tree import TestCategory, Subject, Test
+
+
+class CriteriaTreeProcesser(ABC):
+    @abstractmethod
+    def process_subject(self, subject: "Subject") -> Any:
+        pass
+
+    @abstractmethod
+    def process_test(self, test: "Test") -> Any:
+        pass
+
+    @abstractmethod
+    def process_category(self, category: "TestCategory") -> Any:
+        pass

From ef0463f8778baf1683ce1ee684813a224b0173de Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Fri, 26 Dec 2025 18:31:34 -0300
Subject: [PATCH 04/49] feat: criteria tree parser

---
 autograder/models/config/__init__.py         |   0
 autograder/models/config/criteria.py         |  10 +
 autograder/models/config/subject.py          |  30 ++
 autograder/models/config/test.py             |   8 +
 autograder/parsers/__init__.py               |   0
 autograder/parsers/criteria_tree.py          | 122 +++++++
 autograder/services/criteria_tree_service.py | 335 +++++--------------
 requirements.txt                             |   4 +-
 8 files changed, 257 insertions(+), 252 deletions(-)
 create mode 100644 autograder/models/config/__init__.py
 create mode 100644 autograder/models/config/criteria.py
 create mode 100644 autograder/models/config/subject.py
 create mode 100644 autograder/models/config/test.py
 create mode 100644 autograder/parsers/__init__.py
 create mode 100644 autograder/parsers/criteria_tree.py

diff --git a/autograder/models/config/__init__.py b/autograder/models/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autograder/models/config/criteria.py b/autograder/models/config/criteria.py
new file mode 100644
index 0000000..7c8366e
--- /dev/null
+++ b/autograder/models/config/criteria.py
@@ -0,0 +1,10 @@
+from typing import Optional
+from .subject import SubjectConfig
+from pydantic import BaseModel
+
+
+class CriteriaConfig(BaseModel):
+    test_library: str
+    base: SubjectConfig
+    bonus: Optional[SubjectConfig] = None
+    penalty: Optional[SubjectConfig] = None
diff --git a/autograder/models/config/subject.py b/autograder/models/config/subject.py
new file mode 100644
index 0000000..74cb14d
--- /dev/null
+++ b/autograder/models/config/subject.py
@@ -0,0 +1,30 @@
+from typing import Dict, List, Optional
+from .test import TestConfig
+from pydantic import BaseModel, model_validator
+
+
+class SubjectConfig(BaseModel):
+    weight: Optional[int] = None
+    subjects: Optional[Dict[str, "SubjectConfig"]] = None
+    tests: Optional[List[TestConfig | str]] = None
+    subjects_weight: Optional[int] = None
+
+    @model_validator(mode="after")
+    def check_subjects_and_tests(self) -> "SubjectConfig":
+        if self.subjects is None and self.tests is None:
+            raise ValueError(
+                "You needs to defined at least one of: 'subjects' or 'tests'"
+            )
+
+        if self.subjects and self.tests:
+            if self.subjects_weight is None:
+                raise ValueError(
+                    "When defining both subjects and tests, you need to define subjects_weight"
+                )
+
+            if self.subjects_weight <= 0 or self.subjects_weight >= 100:
+                raise ValueError(
+                    "subjects_weight needs to be in exclusive range ]0,100["
+                )
+
+        return self
diff --git a/autograder/models/config/test.py b/autograder/models/config/test.py
new file mode 100644
index 0000000..a8f27e7
--- /dev/null
+++ b/autograder/models/config/test.py
@@ -0,0 +1,8 @@
+from typing import List, Optional
+from pydantic import BaseModel
+
+
+class TestConfig(BaseModel):
+    name: str
+    file: str
+    calls: Optional[List[List[str]]] = None
diff --git a/autograder/parsers/__init__.py b/autograder/parsers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autograder/parsers/criteria_tree.py b/autograder/parsers/criteria_tree.py
new file mode 100644
index 0000000..42df39a
--- /dev/null
+++ b/autograder/parsers/criteria_tree.py
@@ -0,0 +1,122 @@
+from typing import Any, Dict, List, Optional, override
+
+from autograder.models.abstract.template import Template
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.config.subject import SubjectConfig
+from autograder.models.config.test import TestConfig
+from autograder.models.criteria_tree import (
+    CriteriaTree,
+    Subject,
+    Test,
+    TestCall,
+    TestCategory,
+)
+from autograder.models.dataclass.test_result import TestResult
+
+
+class CriteriaTreeParser:
+    def __parse_subjects(self, configs: Dict[str, SubjectConfig]) -> List[Subject]:
+        subjects = [
+            self.__parse_subject(s_name, s_data) for s_name, s_data in configs.items()
+        ]
+        self.__balance_subject_weights(subjects)
+        return subjects
+
+    def __parse_subject(self, name: str, config: SubjectConfig) -> Subject:
+        subject = Subject(name)
+        if config.weight:
+            subject.weight = config.weight
+
+        if config.subjects:
+            subject.subjects = self.__parse_subjects(config.subjects)
+
+        if config.tests:
+            subject.tests = self.__parse_tests(config.tests)
+
+        return subject
+
+    def __balance_subject_weights(self, subjects: List[Subject]) -> None:
+        total_weight = sum(s.weight for s in subjects)
+        if total_weight > 0 and total_weight != 100:
+            scaling_factor = 100 / total_weight
+            for subject in subjects:
+                subject.weight = round(subject.weight * scaling_factor)
+
+    def __parse_tests(self, tests_data: List[TestConfig | str]) -> List[Test]:
+        return [self.__parse_test(test_item) for test_item in tests_data]
+
+    def __parse_test(self, test_item: TestConfig | str) -> Test:
+        if isinstance(test_item, str):
+            test_name = test_item
+            test_file = None
+            calls = None
+        elif isinstance(test_item, TestConfig):
+            test_name = test_item.name
+            test_file = test_item.file
+            calls = test_item.calls
+
+        test = Test(test_name, test_file)
+        if calls is not None:
+            for call_args in calls:
+                test.add_call(TestCall(call_args))
+        else:
+            test.add_call(TestCall([]))
+
+        return test
+
+    def __parse_category(self, category_name, config: SubjectConfig) -> TestCategory:
+        category = TestCategory(category_name)
+
+        if config.weight:
+            category.max_score = config.weight
+
+        if config.subjects:
+            category.add_subjects(self.__parse_subjects(config.subjects))
+
+        if config.tests:
+            category.tests = self.__parse_tests(config.tests)
+
+        return category
+
+    def parse_tree(self, tree_data: Dict[str, Any]) -> CriteriaTree:
+        tree = CriteriaTree()
+        config = CriteriaConfig(**tree_data)
+
+        for category_name in ["base", "bonus", "penalty"]:
+            category_data = getattr(config, category_name)
+            if category_data is None:
+                continue
+            parsed_category = self.__parse_category(category_name, category_data)
+            setattr(tree, category_name, parsed_category)
+
+        return tree
+
+
+class PreExecutedTreeParser(CriteriaTreeParser):
+    def __init__(self, template: Template, submission_files: Dict[str, str]) -> None:
+        self.__template: Template = template
+        self.__submission_files = submission_files
+        self.__current_subject_name: Optional[str] = None
+
+    @override
+    def __parse_subject(self, name: str, config: SubjectConfig) -> Subject:
+        self.__current_subject_name = name
+        subject = super().__parse_subject(name, config)
+        self.__current_subject_name = None
+        return subject
+
+    @override
+    def __parse_tests(self, tests_data: List[TestConfig | str]) -> List[TestResult]:
+        subject_name = self.__current_subject_name
+        if subject_name is None:
+            raise ValueError(
+                "Failed to get subject_name during pre executed tree parsing"
+            )
+        tests = super().__parse_tests(tests_data)
+        result = []
+        for test in tests:
+            executed_tests = test.get_result(
+                self.__template, self.__submission_files, subject_name
+            )
+            result.extend(executed_tests)
+        return result
diff --git a/autograder/services/criteria_tree_service.py b/autograder/services/criteria_tree_service.py
index f4d5282..5c6de63 100644
--- a/autograder/services/criteria_tree_service.py
+++ b/autograder/services/criteria_tree_service.py
@@ -1,271 +1,106 @@
-from typing import List, Dict, Any
+from autograder.models.abstract.template import Template
+from autograder.models.criteria_tree import CriteriaTree
+from autograder.parsers.criteria_tree import CriteriaTreeParser, PreExecutedTreeParser
 
-from autograder.builder.models.criteria_tree import Criteria, Subject, Test, TestCall, TestResult
-from autograder.builder.models.template import Template
-from autograder.context import request_context
 
 class CriteriaTreeService:
     """A factory for creating a Criteria object from a configuration dictionary."""
-    @staticmethod
-    def build_pre_executed_tree(template: Template) -> Criteria:
-        """ Builds a Criteria tree and pre-executes all tests, having leaves as TestResult objects."""
 
+    @staticmethod
+    def build_pre_executed_tree(template: Template) -> CriteriaTree:
+        """Builds a Criteria tree and pre-executes all tests, having leaves as TestResult objects."""
         request = request_context.get_request()
         config_dict = request.assignment_config.criteria
         submission_files = request.submission_files
-        criteria = Criteria()
-
-        for category_name in ["base", "bonus", "penalty"]:
-            if category_name in config_dict:
-                category = getattr(criteria, category_name)
-                category_data = config_dict[category_name]
-
-                if "weight" in category_data:
-                    category.max_score = category_data.get("weight", 100)
-
-                # Validate that category doesn't have both subjects and tests
-                if "subjects" in category_data and "tests" in category_data:
-                    raise ValueError(f"Config error: Category '{category_name}' cannot have both 'tests' and 'subjects'.")
-
-                if "subjects" in category_data:
-                    subjects = [
-                        CriteriaTree._parse_and_execute_subject(s_name, s_data, template, submission_files)
-                        for s_name, s_data in category_data["subjects"].items()
-                    ]
-                    CriteriaTree._balance_subject_weights(subjects)
-                    for subject in subjects:
-                        category.add_subject(subject)
-                elif "tests" in category_data:
-                    # Handle tests directly at category level
-                    parsed_tests = CriteriaTree._parse_tests(category_data["tests"])
-                    executed_tests = []
-                    for test in parsed_tests:
-                        test_results = test.get_result(template, submission_files, category_name)
-                        executed_tests.extend(test_results)
-                    category.tests = executed_tests
-        return criteria
+        parser = PreExecutedTreeParser(template, submission_files)
+        return parser.parse_tree(config_dict)
 
     @staticmethod
-    def build_non_executed_tree() -> Criteria:
+    def build_tree() -> CriteriaTree:
         """Builds the entire criteria tree, including balancing subject weights."""
-        criteria = Criteria()
         request = request_context.get_request()
         config_dict = request.assignment_config.criteria
-        for category_name in ["base", "bonus", "penalty"]:
-            if category_name in config_dict:
-                category = getattr(criteria, category_name)
-                category_data = config_dict[category_name]
-
-                # Set max_score for bonus and penalty categories
-                if "weight" in category_data:
-                    category.max_score = category_data.get("weight", 100)
-
-                # Validate that category doesn't have both subjects and tests
-                if "subjects" in category_data and "tests" in category_data:
-                    raise ValueError(f"Config error: Category '{category_name}' cannot have both 'tests' and 'subjects'.")
-
-                if "subjects" in category_data:
-                    subjects = [
-                        CriteriaTree._parse_subject(s_name, s_data)
-                        for s_name, s_data in category_data["subjects"].items()
-                    ]
-                    CriteriaTree._balance_subject_weights(subjects)
-                    for subject in subjects:
-                        category.add_subject(subject)
-                elif "tests" in category_data:
-                    # Handle tests directly at category level
-                    category.tests = CriteriaTree._parse_tests(category_data["tests"])
-        return criteria
-
-    @staticmethod
-    def _balance_subject_weights(subjects: List[Subject]):
-        """Balances the weights of a list of sibling subjects to sum to 100."""
-        total_weight = sum(s.weight for s in subjects)
-        if total_weight > 0 and total_weight != 100:
-            scaling_factor = 100 / total_weight
-            for subject in subjects:
-                subject.weight *= scaling_factor
-
-    @staticmethod
-    def _parse_subject(subject_name: str, subject_data: dict) -> Subject:
-        """Recursively parses a subject and balances the weights of its children."""
-        if "tests" in subject_data and "subjects" in subject_data:
-            raise ValueError(f"Config error: Subject '{subject_name}' cannot have both 'tests' and 'subjects'.")
-
-        subject = Subject(subject_name, subject_data.get("weight", 0))
-        if "tests" in subject_data:
-            subject.tests = CriteriaTree._parse_tests(subject_data["tests"])
-        elif "subjects" in subject_data:
-            child_subjects = [
-                CriteriaTree._parse_subject(sub_name, sub_data)
-                for sub_name, sub_data in subject_data["subjects"].items()
-            ]
-            CriteriaTree._balance_subject_weights(child_subjects)
-            subject.subjects = {s.name: s for s in child_subjects}
-        else:
-            subject.tests = []
-        return subject
-
-    @staticmethod
-    def _parse_and_execute_subject(subject_name: str, subject_data: dict, template: Template, submission_files: dict) -> Subject:
-        """Recursively parses a subject, executes its tests, and balances the weights of its children."""
-        if "tests" in subject_data and "subjects" in subject_data:
-            raise ValueError(f"Config error: Subject '{subject_name}' cannot have both 'tests' and 'subjects'.")
-
-        subject = Subject(subject_name, subject_data.get("weight", 0))
-
-        if "tests" in subject_data:
-            parsed_tests = CriteriaTree._parse_tests(subject_data["tests"])
-            executed_tests = []
-            for test in parsed_tests:
-                # The run method executes the test and returns a list of TestResult objects
-                test_results = test.get_result(template, submission_files, subject_name)
-                executed_tests.extend(test_results)
-            subject.tests = executed_tests  # Store TestResult objects instead of Test objects
-        elif "subjects" in subject_data:
-            child_subjects = [
-                CriteriaTree._parse_and_execute_subject(sub_name, sub_data, template, submission_files)
-                for sub_name, sub_data in subject_data["subjects"].items()
-            ]
-            CriteriaTree._balance_subject_weights(child_subjects)
-            subject.subjects = {s.name: s for s in child_subjects}
-        else:
-            subject.tests = []
-        return subject
-
-    @staticmethod
-    def _parse_tests(test_data: list) -> List[Test]:
-        """Parses a list of test definitions from the configuration."""
-        parsed_tests = []
-        for test_item in test_data:
-            if isinstance(test_item, str):
-                # Handle simple test names (e.g., "check_no_unclosed_tags")
-                test = Test(name=test_item)  # Default file
-                test.add_call(TestCall(args=[]))
-                parsed_tests.append(test)
-
-            elif isinstance(test_item, dict):
-                # Handle complex test definitions
-                test_name = test_item.get("name")
-                test_file = test_item.get("file")
-                if not test_name:
-                    raise ValueError(f"Test definition is missing 'name': {test_item}")
-
-                test = Test(name=test_name, filename=test_file)
-
-                if "calls" in test_item:
-                    for call_args in test_item["calls"]:
-                        test.add_call(TestCall(args=call_args))
-                else:
-                    # If no 'calls' are specified, it's a single call with no arguments
-                    test.add_call(TestCall(args=[]))
-
-                parsed_tests.append(test)
-
-        return parsed_tests
-
+        parser = CriteriaTreeParser()
+        return parser.parse_tree(config_dict)
 
 
 if __name__ == "__main__":
     criteria_json = {
-  "test_library": "essay ai grader",
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "foundations": {
-        "weight": 60,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "thesis_statement"
-          },
-          {
-            "file": "essay.txt",
-            "name": "clarity_and_cohesion"
-          },
-          {
-            "file": "essay.txt",
-            "name": "grammar_and_spelling"
-          }
-        ]
-      },
-      "prompt_adherence": {
-        "weight": 40,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "adherence_to_prompt",
-            "calls": [
-              [ "Analyze the primary causes of the Industrial Revolution and its impact on 19th-century society." ]
-            ]
-          }
-        ]
-      }
+        "test_library": "essay ai grader",
+        "base": {
+            "weight": 100,
+            "subjects": {
+                "foundations": {
+                    "weight": 60,
+                    "tests": [
+                        {"file": "essay.txt", "name": "thesis_statement"},
+                        {"file": "essay.txt", "name": "clarity_and_cohesion"},
+                        {"file": "essay.txt", "name": "grammar_and_spelling"},
+                    ],
+                },
+                "prompt_adherence": {
+                    "weight": 40,
+                    "tests": [
+                        {
+                            "file": "essay.txt",
+                            "name": "adherence_to_prompt",
+                            "calls": [
+                                [
+                                    "Analyze the primary causes of the Industrial Revolution and its impact on 19th-century society."
+                                ]
+                            ],
+                        }
+                    ],
+                },
+            },
+        },
+        "bonus": {
+            "weight": 30,
+            "subjects": {
+                "rhetorical_skill": {
+                    "weight": 70,
+                    "tests": [
+                        {"file": "essay.txt", "name": "counterargument_handling"},
+                        {"file": "essay.txt", "name": "vocabulary_and_diction"},
+                        {"file": "essay.txt", "name": "sentence_structure_variety"},
+                    ],
+                },
+                "deeper_analysis": {
+                    "weight": 30,
+                    "tests": [
+                        {
+                            "file": "essay.txt",
+                            "name": "topic_connection",
+                            "calls": [
+                                ["technological innovation", "social inequality"]
+                            ],
+                        }
+                    ],
+                },
+            },
+        },
+        "penalty": {
+            "weight": 25,
+            "subjects": {
+                "logical_integrity": {
+                    "weight": 100,
+                    "tests": [
+                        {"file": "essay.txt", "name": "logical_fallacy_check"},
+                        {"file": "essay.txt", "name": "bias_detection"},
+                        {"file": "essay.txt", "name": "originality_and_plagiarism"},
+                    ],
+                }
+            },
+        },
     }
-  },
-  "bonus": {
-    "weight": 30,
-    "subjects": {
-      "rhetorical_skill": {
-        "weight": 70,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "counterargument_handling"
-          },
-          {
-            "file": "essay.txt",
-            "name": "vocabulary_and_diction"
-          },
-          {
-            "file": "essay.txt",
-            "name": "sentence_structure_variety"
-          }
-        ]
-      },
-      "deeper_analysis": {
-        "weight": 30,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "topic_connection",
-            "calls": [
-              [ "technological innovation", "social inequality" ]
-            ]
-          }
-        ]
-      }
-    }
-  },
-  "penalty": {
-    "weight": 25,
-    "subjects": {
-      "logical_integrity": {
-        "weight": 100,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "logical_fallacy_check"
-          },
-          {
-            "file": "essay.txt",
-            "name": "bias_detection"
-          },
-          {
-              "file": "essay.txt",
-              "name": "originality_and_plagiarism"
-          }
-        ]
-      }
-    }
-  }
-}
-    submission_files = {"essay.txt": """Artificial intelligence (AI) is no longer a concept confined to science fiction; it is a transformative force actively reshaping industries and redefining the nature of work. Its integration into the modern workforce presents a profound duality: on one hand, it offers unprecedented opportunities for productivity and innovation, while on the other, it poses significant challenges related to job displacement and economic inequality. Navigating this transition successfully requires a proactive and nuanced approach from policymakers, businesses, and individuals alike.
+    submission_files = {
+        "essay.txt": """Artificial intelligence (AI) is no longer a concept confined to science fiction; it is a transformative force actively reshaping industries and redefining the nature of work. Its integration into the modern workforce presents a profound duality: on one hand, it offers unprecedented opportunities for productivity and innovation, while on the other, it poses significant challenges related to job displacement and economic inequality. Navigating this transition successfully requires a proactive and nuanced approach from policymakers, businesses, and individuals alike.
 The primary benefit of AI in the workplace is its capacity to augment human potential and drive efficiency. AI-powered systems can analyze vast datasets in seconds, automating routine cognitive and manual tasks, which frees human workers to focus on more complex, creative, and strategic endeavors. For instance, in medicine, AI algorithms assist radiologists in detecting tumors with greater accuracy, while in finance, they identify fraudulent transactions far more effectively than any human team. This collaboration between human and machine not only boosts output but also creates new roles centered around AI development, ethics, and system maintenance—jobs that did not exist a decade ago.
 However, this technological advancement casts a significant shadow of disruption. The same automation that drives efficiency also leads to job displacement, particularly for roles characterized by repetitive tasks. Assembly line workers, data entry clerks, and even some paralegal roles face a high risk of obsolescence. This creates a widening skills gap, where demand for high-level technical skills soars while demand for traditional skills plummets. Without robust mechanisms for reskilling and upskilling the existing workforce, this gap threatens to exacerbate socio-economic inequality, creating a divide between those who can command AI and those who are displaced by it. There are many gramatical errors in this sentence, for testing purposes.
 The most critical challenge, therefore, is not to halt technological progress but to manage its societal impact. A multi-pronged strategy is essential. Governments and educational institutions must collaborate to reform curricula, emphasizing critical thinking, digital literacy, and lifelong learning. Furthermore, corporations have a responsibility to invest in their employees through continuous training programs. Finally, strengthening social safety nets, perhaps through concepts like Universal Basic Income (UBI) or enhanced unemployment benefits, may be necessary to support individuals as they navigate this volatile transition period.
-In conclusion, AI is a double-edged sword. Its potential to enhance productivity and create new avenues for growth is undeniable, but so are the risks of displacement and inequality. The future of work will not be a battle of humans versus machines, but rather a story of adaptation. By investing in education, promoting equitable policies, and fostering a culture of continuous learning, we can harness the power of AI to build a more prosperous and inclusive workforce for all."""}
-    #tree = CriteriaTree.build_pre_executed_tree(criteria_json, WebDevLibrary(), submission_files)
-    tree = CriteriaTree.build_non_executed_tree(criteria_json)
-    #tree.print_pre_executed_tree()
-    tree.print_tree()
\ No newline at end of file
+In conclusion, AI is a double-edged sword. Its potential to enhance productivity and create new avenues for growth is undeniable, but so are the risks of displacement and inequality. The future of work will not be a battle of humans versus machines, but rather a story of adaptation. By investing in education, promoting equitable policies, and fostering a culture of continuous learning, we can harness the power of AI to build a more prosperous and inclusive workforce for all."""
+    }
+    # tree = CriteriaTree.build_pre_executed_tree(criteria_json, WebDevLibrary(), submission_files)
+    tree = CriteriaTreeService.build_tree()
+    # tree.print_pre_executed_tree()
+    tree.print_tree()
diff --git a/requirements.txt b/requirements.txt
index 6639fa8..b856c04 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,10 +8,10 @@ openai==1.93.0
 requests~=2.32.4
 beautifulsoup4~=4.13.4
 dotenv~=0.9.9
-pydantic
+pydantic~=2.12.5
 python-dotenv~=1.1.1
 upstash-redis==1.4.0
 fastapi~=0.115.0
 uvicorn[standard]~=0.32.0
 python-multipart
-docker~=7.1.0
\ No newline at end of file
+docker~=7.1.0

From e0c296301671bbabd1ebe2344adbcbc996fa9567 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Sun, 28 Dec 2025 13:12:09 -0300
Subject: [PATCH 05/49] fix: add missing subject_weight field

---
 autograder/models/criteria_tree.py  | 11 ++++++-----
 autograder/parsers/criteria_tree.py |  3 +++
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/autograder/models/criteria_tree.py b/autograder/models/criteria_tree.py
index 035c6eb..67f3481 100644
--- a/autograder/models/criteria_tree.py
+++ b/autograder/models/criteria_tree.py
@@ -1,4 +1,4 @@
-from typing import List, Any
+from typing import List, Any, Optional
 from autograder.models.dataclass.test_result import TestResult
 from autograder.utils.formatters.criteria_tree import PreExecutedTreeFormatter
 from autograder.utils.printers.criteria_tree import CriteriaTreePrinter
@@ -20,7 +20,7 @@ class Test:
     This is a LEAF node in the grading tree.
     """
 
-    def __init__(self, name: str, filename: str | None = None):
+    def __init__(self, name: str, filename: Optional[str] = None):
         self.name: str = name
         self.file: str | None = filename
         self.calls: List[TestCall] = []
@@ -90,9 +90,10 @@ class Subject:
     a list of nested subjects. This is a BRANCH and/or LEAF-HOLDER node.
     """
 
-    def __init__(self, name, weight=0):
-        self.name = name
-        self.weight = weight
+    def __init__(self, name: str, weight: int, subjects_weight: Optional[int] = None):
+        self.name: str = name
+        self.weight: int = weight
+        self.subjects_weight: Optional[int] = subjects_weight
         self.tests: List[Test] = list()
         self.subjects: List[Subject] = list()
 
diff --git a/autograder/parsers/criteria_tree.py b/autograder/parsers/criteria_tree.py
index 42df39a..248da00 100644
--- a/autograder/parsers/criteria_tree.py
+++ b/autograder/parsers/criteria_tree.py
@@ -27,6 +27,9 @@ def __parse_subject(self, name: str, config: SubjectConfig) -> Subject:
         if config.weight:
             subject.weight = config.weight
 
+        if config.subjects_weight:
+            subject.subjects_weight = config.subjects_weight
+
         if config.subjects:
             subject.subjects = self.__parse_subjects(config.subjects)
 

From 19391e1e879d541206751ce29f2582948c931957 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Sun, 28 Dec 2025 23:12:38 -0300
Subject: [PATCH 06/49] feat: first glance of pipeline architecture

---
 tests/autograder/builder/__init__.py          |   0
 tests/autograder/builder/test_tree.py         |   0
 tests/autograder/core/__init__.py             |   0
 tests/autograder/core/reporter/__init__.py    |   0
 .../core/reporter/test_ai_reporter.py         |   0
 .../core/reporter/test_default_reporter.py    |   0
 tests/autograder/core/test_grader.py          |   0
 tests/autograder/test_facade.py               | 236 ------------------
 tests/unit/builder/__init__.py                |   0
 tests/unit/builder/test_tree.py               | 165 ------------
 tests/unit/core/__init__.py                   |   0
 tests/unit/core/reporter/__init__.py          |   0
 tests/unit/core/reporter/test_ai_reporter.py  |   0
 .../core/reporter/test_default_reporter.py    | 141 -----------
 tests/unit/core/test_grader.py                | 176 -------------
 tests/unit/templates/__init__.py              |   0
 tests/unit/test_facade.py                     | 236 ------------------
 tests/unit/test_preflight_step.py             |  87 -------
 18 files changed, 1041 deletions(-)
 delete mode 100644 tests/autograder/builder/__init__.py
 delete mode 100644 tests/autograder/builder/test_tree.py
 delete mode 100644 tests/autograder/core/__init__.py
 delete mode 100644 tests/autograder/core/reporter/__init__.py
 delete mode 100644 tests/autograder/core/reporter/test_ai_reporter.py
 delete mode 100644 tests/autograder/core/reporter/test_default_reporter.py
 delete mode 100644 tests/autograder/core/test_grader.py
 delete mode 100644 tests/autograder/test_facade.py
 delete mode 100644 tests/unit/builder/__init__.py
 delete mode 100644 tests/unit/builder/test_tree.py
 delete mode 100644 tests/unit/core/__init__.py
 delete mode 100644 tests/unit/core/reporter/__init__.py
 delete mode 100644 tests/unit/core/reporter/test_ai_reporter.py
 delete mode 100644 tests/unit/core/reporter/test_default_reporter.py
 delete mode 100644 tests/unit/core/test_grader.py
 delete mode 100644 tests/unit/templates/__init__.py
 delete mode 100644 tests/unit/test_facade.py
 delete mode 100644 tests/unit/test_preflight_step.py

diff --git a/tests/autograder/builder/__init__.py b/tests/autograder/builder/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/autograder/builder/test_tree.py b/tests/autograder/builder/test_tree.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/autograder/core/__init__.py b/tests/autograder/core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/autograder/core/reporter/__init__.py b/tests/autograder/core/reporter/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/autograder/core/reporter/test_ai_reporter.py b/tests/autograder/core/reporter/test_ai_reporter.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/autograder/core/reporter/test_default_reporter.py b/tests/autograder/core/reporter/test_default_reporter.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/autograder/core/test_grader.py b/tests/autograder/core/test_grader.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/autograder/test_facade.py b/tests/autograder/test_facade.py
deleted file mode 100644
index 79e428b..0000000
--- a/tests/autograder/test_facade.py
+++ /dev/null
@@ -1,236 +0,0 @@
-import unittest
-from unittest.mock import patch, Mock
-
-from autograder.pipeline.autograder_facade import Autograder
-from autograder.models.autograder_request import AutograderRequest
-from autograder.models.assignment_config import AssignmentConfig
-from autograder.models.dataclass.autograder_response import AutograderResponse
-from autograder.models.dataclass.result import Result
-
-
-class TestAutograderFacade(unittest.TestCase):
-
-    def setUp(self):
-        # Common test data
-        self.mock_submission = {"file.py": "print('hello')"}
-        self.mock_criteria = {"base": {"subjects": {"test": {"tests": ["passing_test"]}}}}
-        self.mock_feedback_prefs = {"general": {}}
-
-        self.mock_assignment_config = AssignmentConfig(
-            criteria=self.mock_criteria,
-            feedback=self.mock_feedback_prefs,
-            setup={},
-            template="web dev"
-        )
-
-        # A standard successful result from the Grader
-        self.mock_grader_result = Result(
-            final_score=85.0,
-            author="test_student",
-            submission_file=self.mock_submission,
-            base_results=[], bonus_results=[], penalty_results=[]
-        )
-
-    @patch('pipeline.autograder_facade.CriteriaTree')
-    @patch('pipeline.autograder_facade.TemplateLibrary')
-    @patch('pipeline.autograder_facade.Grader')
-    @patch('pipeline.autograder_facade.Reporter')
-    def test_grade_success_default_feedback(self, mock_reporter, mock_grader, mock_template_library,
-                                            mock_criteria_tree):
-        """A successful grading run that returns generated default feedback."""
-        # Arrange
-        # Create a fake template object with the attributes the facade expects
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        fake_reporter = Mock()
-        fake_reporter.generate_feedback.return_value = "Great job!"
-        mock_reporter.create_default_reporter.return_value = fake_reporter
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=True,
-            feedback_mode="default"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertIsInstance(response, AutograderResponse)
-        self.assertEqual(response.status, "Success")
-        self.assertEqual(response.final_score, 85.0)
-        self.assertEqual(response.feedback, "Great job!")
-
-        mock_template_library.get_template.assert_called_once_with("web dev")
-        mock_criteria_tree.build_non_executed_tree.assert_called_once()
-        mock_grader.return_value.run.assert_called_once()
-        mock_reporter.create_default_reporter.assert_called_once()
-
-    @patch('pipeline.autograder_facade.TemplateLibrary')
-    def test_grade_failure_invalid_template(self, mock_template_library):
-        """If TemplateLibrary returns None, the facade should fail with an informative message."""
-        # Arrange
-        mock_template_library.get_template.return_value = None
-
-        invalid_config = AssignmentConfig(
-            criteria = self.mock_criteria,
-            feedback = self.mock_feedback_prefs,
-            setup = {},
-            template="invalid template"
-        )
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=invalid_config,
-            student_name="student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("Unsupported template: invalid template", response.feedback)
-
-    @patch('pipeline.autograder_facade.CriteriaTree')
-    @patch('pipeline.autograder_facade.TemplateLibrary')
-    @patch('pipeline.autograder_facade.Grader')
-    def test_grade_failure_during_grading(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """If the Grader raises an exception the facade should return a failure response containing the error."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.side_effect = Exception("Something went wrong in the grader")
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("Something went wrong in the grader", response.feedback)
-
-    @patch('pipeline.autograder_facade.CriteriaTree')
-    @patch('pipeline.autograder_facade.TemplateLibrary')
-    @patch('pipeline.autograder_facade.Grader')
-    def test_grade_failure_ai_missing_credentials(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """AI feedback mode without the required keys should fail with an explanatory message."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=True,
-            feedback_mode="ai",
-            openai_key=None  # missing keys
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("OpenAI key, Redis URL, and Redis token are required", response.feedback)
-
-    @patch('pipeline.autograder_facade.PreFlight')
-    def test_preflight_failure_stops_processing(self, mock_preflight):
-        """If pre-flight returns impediments, grading should stop and return those messages."""
-        # Arrange
-        mock_preflight.run.return_value = [{'message': 'setup failed due to X'}]
-
-        config_with_setup = AssignmentConfig(
-             criteria=self.mock_criteria,
-             feedback=self.mock_feedback_prefs,
-             setup={'cmds': []},
-             template="web dev"
-        )
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=config_with_setup,
-            student_name="student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn('setup failed due to X', response.feedback)
-
-    @patch('pipeline.autograder_facade.CriteriaTree')
-    @patch('pipeline.autograder_facade.TemplateLibrary')
-    @patch('pipeline.autograder_facade.Grader')
-    def test_no_feedback_requested_returns_score_only(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """When include_feedback is False, the facade should return the score and an empty feedback string."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=False
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "Success")
-        self.assertEqual(response.final_score, 85.0)
-        self.assertEqual(response.feedback, "")
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/unit/builder/__init__.py b/tests/unit/builder/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/builder/test_tree.py b/tests/unit/builder/test_tree.py
deleted file mode 100644
index 51b74ff..0000000
--- a/tests/unit/builder/test_tree.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import unittest
-# Assuming your tree builder and models are in this path
-from autograder.services.criteria_tree_service import CriteriaTree, Criteria, Subject, Test, TestCall
-
-class TestCriteriaTree(unittest.TestCase):
-
-    def test_empty_config(self):
-        """
-        Tests that building a tree from an empty config results in an empty Criteria object.
-        """
-        config = {}
-        criteria = CriteriaTree.build(config)
-        self.assertIsInstance(criteria, Criteria)
-        self.assertEqual(len(criteria.base.subjects), 0)
-
-    def test_invalid_subject(self):
-        """
-        Tests that a ValueError is raised if a subject has both 'tests' and 'subjects'.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "invalid_subject": {
-                        "tests": [{"file": "index.html", "name": "some_test"}],
-                        "subjects": {"sub_subject": {}}
-                    }
-                }
-            }
-        }
-        with self.assertRaises(ValueError):
-            CriteriaTree.build(config)
-
-    def test_weight_balancing(self):
-        """
-        Tests that the weights of sibling subjects are correctly balanced to sum to 100.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "html": {"weight": 60, "tests": []},
-                    "css": {"weight": 40, "tests": []}
-                }
-            },
-            "bonus": {
-                "weight": 50,
-                "subjects": {
-                    # These weights (10 + 10 = 20) will be scaled to sum to 100
-                    "accessibility": {"weight": 10, "tests": []},
-                    "performance": {"weight": 10, "tests": []}
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-
-        # Check base subjects (already sum to 100)
-        self.assertAlmostEqual(criteria.base.subjects["html"].weight, 60)
-        self.assertAlmostEqual(criteria.base.subjects["css"].weight, 40)
-
-        # Check bonus subjects (should be scaled: 10/20 -> 50, 10/20 -> 50)
-        self.assertAlmostEqual(criteria.bonus.subjects["accessibility"].weight, 50)
-        self.assertAlmostEqual(criteria.bonus.subjects["performance"].weight, 50)
-        self.assertEqual(criteria.bonus.max_score, 50)
-
-    def test_structure_and_defaults_with_new_format(self):
-        """
-        Tests the overall structure with the new explicit test format.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "html": {
-                        "tests": [
-                            # Test with no calls
-                            {"file": "index.html", "name": "test1"},
-                            # Test with calls
-                            {
-                                "file": "index.html",
-                                "name": "test2",
-                                "calls": [["arg1", 1], ["arg2"]]
-                            },
-                            # Simple string test (should get a default file)
-                            "test3"
-                        ]
-                    }
-                }
-            },
-            "penalty": {"weight": 75}
-        }
-        criteria = CriteriaTree.build(config)
-
-        # Test category weights
-        self.assertEqual(criteria.penalty.max_score, 75)
-        self.assertEqual(criteria.bonus.max_score, 0)  # Default
-
-        # Test subject structure
-        self.assertIn("html", criteria.base.subjects)
-        html_subject = criteria.base.subjects["html"]
-        self.assertIsInstance(html_subject, Subject)
-        #self.assertEqual(html_subject.weight, 100)  # Default weight when it's the only subject
-
-        # Test tests structure
-        self.assertEqual(len(html_subject.tests), 3)
-
-        # Find and verify test1
-        test1 = next(t for t in html_subject.tests if t.name == "test1")
-        self.assertEqual(test1.file, "index.html")
-        self.assertEqual(len(test1.calls), 1)
-        self.assertEqual(test1.calls[0].args, [])
-
-        # Find and verify test2
-        test2 = next(t for t in html_subject.tests if t.name == "test2")
-        self.assertEqual(test2.file, "index.html")
-        self.assertEqual(len(test2.calls), 2)
-        self.assertEqual(test2.calls[0].args, ["arg1", 1])
-        self.assertEqual(test2.calls[1].args, ["arg2"])
-
-        # Find and verify test3 (simple string)
-        test3 = next(t for t in html_subject.tests if t.name == "test3")
-        self.assertEqual(test3.file, "index.html") # Check default file assignment
-        self.assertEqual(len(test3.calls), 1)
-        self.assertEqual(test3.calls[0].args, [])
-
-    def test_complex_weight_balancing(self):
-        """
-        Tests weight balancing with a more complex, nested subject structure.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "frontend": {
-                        "weight": 75,
-                        "subjects": {
-                            "html": {"weight": 50, "tests": []},
-                            "css": {"weight": 50, "tests": []}
-                        }
-                    },
-                    "backend": {
-                        "weight": 25,
-                        "subjects": {
-                            # These weights (10 + 30 = 40) will be scaled to sum to 100
-                            "database": {"weight": 10, "tests": []},
-                            "api": {"weight": 30, "tests": []}
-                        }
-                    }
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-
-        # Top-level subjects should not be re-balanced as they sum to 100
-        self.assertAlmostEqual(criteria.base.subjects["frontend"].weight, 75)
-        self.assertAlmostEqual(criteria.base.subjects["backend"].weight, 25)
-
-        # Nested subjects in 'frontend' are already balanced
-        frontend = criteria.base.subjects["frontend"]
-        self.assertAlmostEqual(frontend.subjects["html"].weight, 50)
-        self.assertAlmostEqual(frontend.subjects["css"].weight, 50)
-
-        # Nested subjects in 'backend' should be re-balanced
-        backend = criteria.base.subjects["backend"]
-        self.assertAlmostEqual(backend.subjects["database"].weight, 25) # 10/40 -> 25
-        self.assertAlmostEqual(backend.subjects["api"].weight, 75)      # 30/40 -> 75
-
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/core/reporter/__init__.py b/tests/unit/core/reporter/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/core/reporter/test_ai_reporter.py b/tests/unit/core/reporter/test_ai_reporter.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/core/reporter/test_default_reporter.py b/tests/unit/core/reporter/test_default_reporter.py
deleted file mode 100644
index 36f09e8..0000000
--- a/tests/unit/core/reporter/test_default_reporter.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import unittest
-from autograder.services.report import DefaultReporter
-from autograder.models.dataclass.result import Result
-from autograder.models.dataclass.test_result import TestResult
-from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
-
-
-class TestDefaultReporter(unittest.TestCase):
-
-    def setUp(self):
-        """Set up a mock Result and FeedbackPreferences object for testing."""
-
-        # Create a variety of test results for different scenarios
-        base_results = [
-            TestResult("passing_base_test", 100, "Base test passed.", "html"),
-            TestResult("failing_base_test", 0, "Base test failed.", "css", {"file": "style.css"})
-        ]
-        bonus_results = [
-            TestResult("passing_bonus_test", 100, "Bonus achieved!", "javascript"),
-            TestResult("failing_bonus_test", 50, "Bonus partially met.", "accessibility")
-        ]
-        penalty_results = [
-            TestResult("passing_penalty_test", 100, "Penalty avoided.", "html_validation"),
-            TestResult("failing_penalty_test", 0, "Penalty applied for malpractice.", "js_malpractice")
-        ]
-
-        self.mock_result = Result(
-            final_score=75.5,
-            author="Jane Doe",
-            submission_file={"index.html": ""},
-            base_results=base_results,
-            bonus_results=bonus_results,
-            penalty_results=penalty_results
-        )
-
-        # Create custom feedback preferences
-        feedback_config = {
-            "general": {
-                "report_title": "Test Report",
-                "show_passed_tests": True,
-                "add_report_summary": True,
-                "online_content": [{
-                    "url": "http://example.com/css-guide",
-                    "description": "CSS Best Practices",
-                    "linked_tests": ["failing_base_test"]
-                }]
-            },
-            "default": {
-                "category_headers": {
-                    "base": "Core Requirements",
-                    "bonus": "Extra Credit",
-                    "penalty": "Areas for Improvement"
-                }
-            }
-        }
-        self.mock_feedback_prefs = FeedbackPreferences.from_dict(feedback_config)
-
-    def test_report_header(self):
-        """Tests if the report header is generated correctly."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        self.assertIn("# Test Report", feedback)
-        self.assertIn("### Olá, **Jane Doe**! 👋", feedback)
-        self.assertIn("> **Nota Final:** **`75.50 / 100`**", feedback)
-
-    def test_report_sections_and_content(self):
-        """
-        Tests that each category section is correctly rendered based on feedback preferences.
-        """
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # Check for custom headers
-        self.assertIn("## Core Requirements", feedback)
-        self.assertIn("## Extra Credit", feedback)
-        self.assertIn("## Areas for Improvement", feedback)
-
-        # Base section should only show the failing test
-        self.assertIn("failing_base_test", feedback)
-        self.assertNotIn("passing_base_test", feedback)
-
-        # Bonus section should only show the passing test (since show_passed_tests is True)
-        self.assertIn("passing_bonus_test", feedback)
-        self.assertNotIn("failing_bonus_test", feedback)
-
-        # Penalty section should only show the failing (applied) penalty
-        self.assertIn("failing_penalty_test", feedback)
-        self.assertNotIn("passing_penalty_test", feedback)
-
-    def test_parameter_formatting(self):
-        """Tests if test parameters are formatted correctly in the report."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # Check for the formatted parameter string in the failing base test
-        self.assertIn("(Parâmetros: `file: 'style.css'`)", feedback)
-
-    def test_summary_table_generation(self):
-        """Tests the generation of the summary table with correct entries."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        self.assertIn("### 📝 Resumo dos Pontos de Atenção", feedback)
-        # Should contain the failing base test and the failing penalty test
-        self.assertIn("| Revisar | `css` | `failing_base_test` (Parâmetros: `file: 'style.css'`) |", feedback)
-        self.assertIn("| Corrigir (Penalidade) | `js_malpractice` | `failing_penalty_test` |", feedback)
-        # Should NOT contain any passing tests
-        self.assertNotIn("passing_base_test", feedback.split("### 📝 Resumo dos Pontos de Atenção")[1])
-
-    def test_online_content_linking(self):
-        """Tests if suggested learning resources are correctly linked in the report."""
-        reporter = DefaultReporter(self.mock_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # The failing_base_test is linked to a resource, so it should be present
-        expected_link = "> 📚 **Recurso Sugerido:** [CSS Best Practices](http://example.com/css-guide)"
-        self.assertIn(expected_link, feedback)
-
-    def test_no_issues_report(self):
-        """Tests the report format when all tests pass and no penalties are applied."""
-        # Create a result object with only passing scores
-        all_passing_result = Result(
-            final_score=100.0, author="John Doe", submission_file={},
-            base_results=[TestResult("p1", 100, "p", "s1")],
-            bonus_results=[TestResult("p2", 100, "p", "s2")],
-            penalty_results=[]
-        )
-        reporter = DefaultReporter(all_passing_result, self.mock_feedback_prefs)
-        feedback = reporter.generate_feedback()
-
-        # No category sections for base/penalty should be generated
-        self.assertNotIn("## Core Requirements", feedback)
-        self.assertNotIn("## Areas for Improvement", feedback)
-
-        # Summary should show the success message
-        self.assertIn("Excelente trabalho! Nenhum ponto crítico de atenção foi encontrado.", feedback)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/unit/core/test_grader.py b/tests/unit/core/test_grader.py
deleted file mode 100644
index ac971f0..0000000
--- a/tests/unit/core/test_grader.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import unittest
-
-# Assuming these classes are in your project structure
-from autograder.services.criteria_tree_service import CriteriaTree, Criteria, Subject, Test, TestCall
-from autograder.models.dataclass.result import Result
-from autograder.models.dataclass.test_result import TestResult
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.services.grader_service import Grader
-
-# ===============================================================
-# Mock Template Library based on the new TestFunction model
-# ===============================================================
-
-class PassingTest(TestFunction):
-    @property
-    def name(self): return "passing_test"
-    @property
-    def description(self): return "A mock test that always passes."
-    @property
-    def parameter_description(self): return {}
-    def execute(self, *args, **kwargs) -> TestResult:
-        return TestResult(self.name, 100, "This test always passes.")
-
-class FailingTest(TestFunction):
-    @property
-    def name(self): return "failing_test"
-    @property
-    def description(self): return "A mock test that always fails."
-    @property
-    def parameter_description(self): return {}
-    def execute(self, *args, **kwargs) -> TestResult:
-        return TestResult(self.name, 0, "This test always fails.")
-
-class PartialTest(TestFunction):
-    @property
-    def name(self): return "partial_test"
-    @property
-    def description(self): return "A mock test that gives partial credit."
-    @property
-    def parameter_description(self): return {}
-    def execute(self, *args, **kwargs) -> TestResult:
-        return TestResult(self.name, 50, "This test gives partial credit.")
-
-class MockTemplate(Template):
-    @property
-    def name(self):
-        return "Mock Library"
-
-    def __init__(self):
-        self.tests = {
-            "passing_test": PassingTest(),
-            "failing_test": FailingTest(),
-            "partial_test": PartialTest(),
-        }
-
-    def get_test(self, name: str) -> TestFunction:
-        return self.tests.get(name)
-
-# ===============================================================
-# Updated Unit Test Class
-# ===============================================================
-
-class TestGrader(unittest.TestCase):
-
-    def setUp(self):
-        """
-        Set up a common mock library and submission data for the tests.
-        """
-        self.mock_library = MockTemplate()
-        self.submission_files = {"index.html": "<html></html>"}
-        self.author_name = "Test Student"
-
-    def test_basic_score_calculation(self):
-        """
-        Tests the final score calculation with a mix of passing and failing tests.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "html": {
-                        "weight": 100,
-                        "tests": [
-                            {"file": "index.html", "name": "passing_test"},
-                            {"file": "index.html", "name": "failing_test"}
-                        ]
-                    }
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-        # Average of tests: (100 + 0) / 2 = 50. Subject score = 50.
-        self.assertAlmostEqual(result.final_score, 50)
-
-    def test_bonus_points_application(self):
-        """
-        Tests that bonus points are correctly applied to the final score.
-        """
-        config = {
-            "base": {
-                "subjects": {"html": {"tests": [{"file": "index.html", "name": "partial_test"}]}}
-            },
-            "bonus": {
-                "weight": 20, # This is the max_score for the bonus category
-                "subjects": {"extra": {"tests": [{"file": "index.html", "name": "passing_test"}]}}
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-
-        # Base score = 50. Bonus score = 100.
-        # Bonus points to add = (100 / 100) * 20 = 20.
-        # Final score = 50 + 20 = 70.
-        self.assertAlmostEqual(result.final_score, 70)
-
-    def test_penalty_points_deduction(self):
-        """
-        Tests that penalty points are correctly deducted from the final score.
-        A "failing" penalty test (score=0) means the penalty IS applied.
-        """
-        config = {
-            "base": {
-                "subjects": {"html": {"tests": [{"file": "index.html", "name": "passing_test"}]}}
-            },
-            "penalty": {
-                "weight": 30, # This is the max_score for the penalty category
-                "subjects": {"malpractice": {"tests": [{"file": "index.html", "name": "failing_test"}]}}
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-
-        # Base score = 100.
-        # Penalty test failed (score=0), so 100% of the penalty is incurred.
-        # Penalty points to subtract = (100 / 100) * 30 = 30.
-        # Final score = 100 - 30 = 70.
-        self.assertAlmostEqual(result.final_score, 70)
-
-    def test_complex_grading_with_nested_subjects(self):
-        """
-        Tests the grader with a more complex, nested criteria tree with varying weights.
-        """
-        config = {
-            "base": {
-                "subjects": {
-                    "frontend": {
-                        "weight": 80,
-                        "subjects": {
-                            "html": {"weight": 50, "tests": [{"file": "index.html", "name": "passing_test"}]}, # Score: 100
-                            "css": {"weight": 50, "tests": [{"file": "index.html", "name": "failing_test"}]}  # Score: 0
-                        }
-                    },
-                    "backend": {
-                        "weight": 20,
-                        "tests": [{"file": "index.html", "name": "partial_test"}] # Score: 50
-                    }
-                }
-            }
-        }
-        criteria = CriteriaTree.build(config)
-        grader = Grader(criteria, self.mock_library)
-        result = grader.run(self.submission_files, self.author_name)
-
-        # Frontend score (weighted avg of children) = (100 * 0.5) + (0 * 0.5) = 50
-        # Backend score = 50
-        # Total base score (weighted avg of children) = (50 * 0.8) + (50 * 0.2) = 40 + 10 = 50
-        self.assertAlmostEqual(result.final_score, 50)
-        self.assertIsInstance(result, Result)
-        self.assertEqual(len(grader.base_results), 3)
-
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
diff --git a/tests/unit/templates/__init__.py b/tests/unit/templates/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/test_facade.py b/tests/unit/test_facade.py
deleted file mode 100644
index 4c39c67..0000000
--- a/tests/unit/test_facade.py
+++ /dev/null
@@ -1,236 +0,0 @@
-import unittest
-from unittest.mock import patch, Mock
-
-from autograder.autograder_facade import Autograder
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.models.dataclass.autograder_response import AutograderResponse
-from autograder.models.dataclass.result import Result
-
-
-class TestAutograderFacade(unittest.TestCase):
-
-    def setUp(self):
-        # Common test data
-        self.mock_submission = {"file.py": "print('hello')"}
-        self.mock_criteria = {"base": {"subjects": {"test": {"tests": ["passing_test"]}}}}
-        self.mock_feedback_prefs = {"general": {}}
-
-        self.mock_assignment_config = AssignmentConfig(
-            criteria=self.mock_criteria,
-            feedback=self.mock_feedback_prefs,
-            setup={},
-            template="web dev"
-        )
-
-        # A standard successful result from the Grader
-        self.mock_grader_result = Result(
-            final_score=85.0,
-            author="test_student",
-            submission_file=self.mock_submission,
-            base_results=[], bonus_results=[], penalty_results=[]
-        )
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    @patch('autograder.autograder_facade.Reporter')
-    def test_grade_success_default_feedback(self, mock_reporter, mock_grader, mock_template_library,
-                                            mock_criteria_tree):
-        """A successful grading run that returns generated default feedback."""
-        # Arrange
-        # Create a fake template object with the attributes the facade expects
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        fake_reporter = Mock()
-        fake_reporter.generate_feedback.return_value = "Great job!"
-        mock_reporter.create_default_reporter.return_value = fake_reporter
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=True,
-            feedback_mode="default"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertIsInstance(response, AutograderResponse)
-        self.assertEqual(response.status, "Success")
-        self.assertEqual(response.final_score, 85.0)
-        self.assertEqual(response.feedback, "Great job!")
-
-        mock_template_library.get_template.assert_called_once_with("web dev")
-        mock_criteria_tree.build_non_executed_tree.assert_called_once()
-        mock_grader.return_value.run.assert_called_once()
-        mock_reporter.create_default_reporter.assert_called_once()
-
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    def test_grade_failure_invalid_template(self, mock_template_library):
-        """If TemplateLibrary returns None, the facade should fail with an informative message."""
-        # Arrange
-        mock_template_library.get_template.return_value = None
-
-        invalid_config = AssignmentConfig(
-            criteria = self.mock_criteria,
-            feedback = self.mock_feedback_prefs,
-            setup = {},
-            template="invalid template"
-        )
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=invalid_config,
-            student_name="student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("Unsupported template: invalid template", response.feedback)
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    def test_grade_failure_during_grading(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """If the Grader raises an exception the facade should return a failure response containing the error."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.side_effect = Exception("Something went wrong in the grader")
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("Something went wrong in the grader", response.feedback)
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    def test_grade_failure_ai_missing_credentials(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """AI feedback mode without the required keys should fail with an explanatory message."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=True,
-            feedback_mode="ai",
-            openai_key=None  # missing keys
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn("OpenAI key, Redis URL, and Redis token are required", response.feedback)
-
-    @patch('autograder.autograder_facade.PreFlight')
-    def test_preflight_failure_stops_processing(self, mock_preflight):
-        """If pre-flight returns impediments, grading should stop and return those messages."""
-        # Arrange
-        mock_preflight.run.return_value = [{'message': 'setup failed due to X'}]
-
-        config_with_setup = AssignmentConfig(
-             criteria=self.mock_criteria,
-             feedback=self.mock_feedback_prefs,
-             setup={'cmds': []},
-             template="web dev"
-        )
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=config_with_setup,
-            student_name="student"
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "fail")
-        self.assertEqual(response.final_score, 0.0)
-        self.assertIn('setup failed due to X', response.feedback)
-
-    @patch('autograder.autograder_facade.CriteriaTree')
-    @patch('autograder.autograder_facade.TemplateLibrary')
-    @patch('autograder.autograder_facade.Grader')
-    def test_no_feedback_requested_returns_score_only(self, mock_grader, mock_template_library, mock_criteria_tree):
-        """When include_feedback is False, the facade should return the score and an empty feedback string."""
-        # Arrange
-        fake_template = Mock()
-        fake_template.requires_pre_executed_tree = False
-        fake_template.template_name = "web dev"
-        fake_template.stop = Mock()
-        mock_template_library.get_template.return_value = fake_template
-
-        fake_tree = Mock()
-        fake_tree.print_pre_executed_tree = Mock()
-        mock_criteria_tree.build_non_executed_tree.return_value = fake_tree
-
-        mock_grader.return_value.run.return_value = self.mock_grader_result
-
-        autograder_request = AutograderRequest(
-            submission_files=self.mock_submission,
-            assignment_config=self.mock_assignment_config,
-            student_name="test_student",
-            include_feedback=False
-        )
-
-        # Act
-        response = Autograder.grade(autograder_request)
-
-        # Assert
-        self.assertEqual(response.status, "Success")
-        self.assertEqual(response.final_score, 85.0)
-        self.assertEqual(response.feedback, "")
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/unit/test_preflight_step.py b/tests/unit/test_preflight_step.py
deleted file mode 100644
index 16d6b7c..0000000
--- a/tests/unit/test_preflight_step.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import unittest
-from autograder.steps.pre_flight_step import PreFlightStep
-from autograder.models.dataclass.step_result import StepStatus
-from autograder.models.dataclass.preflight_error import PreflightCheckType
-
-
-class TestPreFlightStep(unittest.TestCase):
-
-    def test_no_required_files_passes(self):
-        """Test that step passes when no required files are specified"""
-        setup_config = {}
-        step = PreFlightStep(setup_config)
-
-        result = step.execute(['file1.py', 'file2.py'])
-
-        self.assertEqual(result.status, StepStatus.SUCCESS)
-        self.assertIsNone(result.error)
-        self.assertTrue(result.is_successful)
-
-    def test_required_files_all_present_passes(self):
-        """Test that step passes when all required files are present"""
-        setup_config = {
-            'required_files': ['file1.py', 'file2.py']
-        }
-        step = PreFlightStep(setup_config)
-
-        result = step.execute(['file1.py', 'file2.py', 'file3.py'])
-
-        self.assertEqual(result.status, StepStatus.SUCCESS)
-        self.assertIsNone(result.error)
-        self.assertTrue(result.is_successful)
-
-    def test_required_files_missing_fails(self):
-        """Test that step fails when required files are missing"""
-        setup_config = {
-            'required_files': ['file1.py', 'file2.py']
-        }
-        step = PreFlightStep(setup_config)
-
-        result = step.execute(['file1.py'])  # file2.py is missing
-
-        self.assertEqual(result.status, StepStatus.FAIL)
-        self.assertIsNotNone(result.error)
-        self.assertFalse(result.is_successful)
-        self.assertIn('file2.py', result.error)
-        self.assertEqual(result.failed_at_step, 'PreFlightStep')
-
-    def test_multiple_missing_files_all_reported(self):
-        """Test that all missing files are reported in the error"""
-        setup_config = {
-            'required_files': ['file1.py', 'file2.py', 'file3.py']
-        }
-        step = PreFlightStep(setup_config)
-
-        result = step.execute(['file1.py'])  # file2.py and file3.py are missing
-
-        self.assertEqual(result.status, StepStatus.FAIL)
-        self.assertIn('file2.py', result.error)
-        self.assertIn('file3.py', result.error)
-
-    def test_setup_commands_not_run_when_file_check_fails(self):
-        """Test that setup commands are not checked if file check fails"""
-        setup_config = {
-            'required_files': ['missing.py'],
-            'setup_commands': ['npm install']
-        }
-        step = PreFlightStep(setup_config)
-
-        result = step.execute(['other.py'])
-
-        # Should fail on file check, not even attempt setup commands
-        self.assertEqual(result.status, StepStatus.FAIL)
-        self.assertIn('missing.py', result.error)
-
-        # Verify only file check errors are present
-        file_check_errors = [e for e in step._pre_flight_service.fatal_errors
-                           if e.type == PreflightCheckType.FILE_CHECK]
-        setup_errors = [e for e in step._pre_flight_service.fatal_errors
-                       if e.type == PreflightCheckType.SETUP_COMMAND]
-
-        self.assertGreater(len(file_check_errors), 0)
-        self.assertEqual(len(setup_errors), 0)
-
-
-if __name__ == '__main__':
-    unittest.main()
-

From b1715c8e338964203a0b6db9a50ad6ef70c9389c Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Sun, 28 Dec 2025 23:13:31 -0300
Subject: [PATCH 07/49] feat: add criteria schema and documentation for grading
 pipeline

---
 critera_schema.json                           | 570 ++++++++++++++++++
 .../configuration/criteria_schema_v2.md       | 306 ++++++++++
 docs/system/pipeline_tree_logic.md            | 324 ++++++++++
 3 files changed, 1200 insertions(+)
 create mode 100644 critera_schema.json
 create mode 100644 docs/system/configuration/criteria_schema_v2.md
 create mode 100644 docs/system/pipeline_tree_logic.md

diff --git a/critera_schema.json b/critera_schema.json
new file mode 100644
index 0000000..31753a3
--- /dev/null
+++ b/critera_schema.json
@@ -0,0 +1,570 @@
+{
+  "test_library": "web_dev",
+  "base": {
+    "weight": 100,
+    "subjects": [
+      {
+        "subject_name": "html",
+        "weight": 60,
+        "subjects": [
+          {
+            "subject_name": "structure",
+            "weight": 40,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "body"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "header"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "nav"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "main"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "article"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 4
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "img"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 5
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "footer"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "div"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "form"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "input"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {
+                    "name": "tag",
+                    "value": "button"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_attribute",
+                "parameters": [
+                  {
+                    "name": "attribute",
+                    "value": "class"
+                  },
+                  {
+                    "name": "required_count",
+                    "value": 2
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "subject_name": "link",
+            "weight": 20,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "check_css_linked"
+              },
+              {
+                "file": "index.html",
+                "name": "check_internal_links_to_article",
+                "parameters": [
+                  {
+                    "name": "required_count",
+                    "value": 4
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "subject_name": "css",
+        "weight": 40,
+        "subjects": [
+          {
+            "subject_name": "responsivity",
+            "weight": 50,
+            "tests": [
+              {
+                "file": "css/styles.css",
+                "name": "uses_relative_units"
+              },
+              {
+                "file": "css/styles.css",
+                "name": "check_media_queries"
+              },
+              {
+                "file": "css/styles.css",
+                "name": "check_flexbox_usage"
+              }
+            ]
+          },
+          {
+            "subject_name": "style",
+            "weight": 50,
+            "tests": [
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "font-size"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "font-family"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "text-align"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "display"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "position"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "margin"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              },
+              {
+                "file": "css/styles.css",
+                "name": "has_style",
+                "parameters": [
+                  {
+                    "name": "style",
+                    "value": "padding"
+                  },
+                  {
+                    "name": "count",
+                    "value": 1
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "bonus": {
+    "weight": 40,
+    "subjects": [
+      {
+        "subject_name": "accessibility",
+        "weight": 20,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_all_images_have_alt"
+          }
+        ]
+      },
+      {
+        "subject_name": "head_detail",
+        "weight": 80,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_head_details",
+            "parameters": [
+              {
+                "name": "detail_tag",
+                "value": "title"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_head_details",
+            "parameters": [
+              {
+                "name": "detail_tag",
+                "value": "meta"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "charset"
+              },
+              {
+                "name": "value",
+                "value": "UTF-8"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "viewport"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "description"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "author"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_attribute_and_value",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "meta"
+              },
+              {
+                "name": "attribute",
+                "value": "name"
+              },
+              {
+                "name": "value",
+                "value": "keywords"
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "penalty": {
+    "weight": 50,
+    "subjects": [
+      {
+        "subject_name": "html",
+        "weight": 50,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_bootstrap_usage"
+          },
+          {
+            "file": "css/styles.css",
+            "name": "check_id_selector_over_usage",
+            "parameters": [
+              {
+                "name": "max_allowed",
+                "value": 2
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "has_forbidden_tag",
+            "parameters": [
+              {
+                "name": "tag",
+                "value": "script"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_html_direct_children"
+          },
+          {
+            "file": "index.html",
+            "name": "check_tag_not_inside",
+            "parameters": [
+              {
+                "name": "child_tag",
+                "value": "header"
+              },
+              {
+                "name": "parent_tag",
+                "value": "main"
+              }
+            ]
+          },
+          {
+            "file": "index.html",
+            "name": "check_tag_not_inside",
+            "parameters": [
+              {
+                "name": "child_tag",
+                "value": "footer"
+              },
+              {
+                "name": "parent_tag",
+                "value": "main"
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "subject_name": "project_structure",
+        "weight": 50,
+        "tests": [
+          {
+            "file": "all",
+            "name": "check_dir_exists",
+            "parameters": [
+              {
+                "name": "dir_path",
+                "value": "css"
+              }
+            ]
+          },
+          {
+            "file": "all",
+            "name": "check_dir_exists",
+            "parameters": [
+              {
+                "name": "dir_path",
+                "value": "imgs"
+              }
+            ]
+          },
+          {
+            "file": "all",
+            "name": "check_project_structure",
+            "parameters": [
+              {
+                "name": "expected_structure",
+                "value": "css/styles.css"
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/docs/system/configuration/criteria_schema_v2.md b/docs/system/configuration/criteria_schema_v2.md
new file mode 100644
index 0000000..0608e0a
--- /dev/null
+++ b/docs/system/configuration/criteria_schema_v2.md
@@ -0,0 +1,306 @@
+# Criteria Schema Documentation
+
+## Overview
+
+The criteria schema defines the grading rubric for assignments. It uses a hierarchical structure with categories, subjects, and tests.
+
+## Schema Version: 2.0 (Current)
+
+### Key Changes from Version 1.0
+
+1. **Subjects as Arrays**: Subjects are now arrays with explicit `subject_name` field (instead of dictionaries with implicit names as keys)
+2. **Named Parameters**: Test parameters are now named objects `[{"name": "param", "value": "val"}]` (instead of positional arrays)
+3. **No Calls Array**: Each test object represents one execution (no `calls` array)
+4. **Template Library Field**: Root config includes optional `test_library` field
+
+## Schema Structure
+
+### Root Configuration
+
+```json
+{
+  "test_library": "web_dev",  // Optional: name of test template to use
+  "base": { /* CategoryConfig */ },     // Required: base grading criteria
+  "bonus": { /* CategoryConfig */ },    // Optional: bonus points
+  "penalty": { /* CategoryConfig */ }   // Optional: penalty points
+}
+```
+
+### Category Configuration
+
+A category can contain either **subjects** OR **tests** (not both).
+
+```json
+{
+  "weight": 100,                        // Weight of this category (0-100)
+  "subjects": [ /* SubjectConfig[] */ ] // Array of subjects
+  // OR
+  "tests": [ /* TestConfig[] */ ]      // Array of tests
+}
+```
+
+### Subject Configuration
+
+A subject can contain either **nested subjects** OR **tests** (not both).
+
+```json
+{
+  "subject_name": "html_structure",    // Required: name of the subject
+  "weight": 40,                         // Weight of this subject (0-100)
+  "subjects": [ /* SubjectConfig[] */ ] // Array of nested subjects
+  // OR
+  "tests": [ /* TestConfig[] */ ]      // Array of tests
+}
+```
+
+### Test Configuration
+
+```json
+{
+  "name": "has_tag",                   // Required: test function name
+  "file": "index.html",                // Optional: target file
+  "parameters": [                      // Optional: named parameters
+    {
+      "name": "tag",
+      "value": "div"
+    },
+    {
+      "name": "required_count",
+      "value": 5
+    }
+  ]
+}
+```
+
+## Complete Example
+
+```json
+{
+  "test_library": "web_dev",
+  "base": {
+    "weight": 100,
+    "subjects": [
+      {
+        "subject_name": "html",
+        "weight": 60,
+        "subjects": [
+          {
+            "subject_name": "structure",
+            "weight": 40,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {"name": "tag", "value": "body"},
+                  {"name": "required_count", "value": 1}
+                ]
+              },
+              {
+                "file": "index.html",
+                "name": "has_tag",
+                "parameters": [
+                  {"name": "tag", "value": "header"},
+                  {"name": "required_count", "value": 1}
+                ]
+              }
+            ]
+          },
+          {
+            "subject_name": "links",
+            "weight": 20,
+            "tests": [
+              {
+                "file": "index.html",
+                "name": "check_css_linked"
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "subject_name": "css",
+        "weight": 40,
+        "tests": [
+          {
+            "file": "style.css",
+            "name": "has_style",
+            "parameters": [
+              {"name": "property", "value": "margin"},
+              {"name": "count", "value": 1}
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "bonus": {
+    "weight": 20,
+    "subjects": [
+      {
+        "subject_name": "accessibility",
+        "weight": 100,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "check_all_images_have_alt"
+          }
+        ]
+      }
+    ]
+  },
+  "penalty": {
+    "weight": 30,
+    "subjects": [
+      {
+        "subject_name": "bad_practices",
+        "weight": 100,
+        "tests": [
+          {
+            "file": "index.html",
+            "name": "has_forbidden_tag",
+            "parameters": [
+              {"name": "tag", "value": "script"}
+            ]
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+## Validation Rules
+
+### Category Level
+- Must have either `subjects` OR `tests` (not both, not neither)
+- Weight must be between 0 and 100
+- If `subjects` is present, it must be a non-empty array
+
+### Subject Level
+- Must have `subject_name` field
+- Must have either `subjects` OR `tests` (not both, not neither)
+- Weight must be between 0 and 100
+- If `subjects` is present, it must be a non-empty array
+
+### Test Level
+- Must have `name` field (test function name)
+- `file` is optional (some tests don't target specific files)
+- `parameters` is optional (empty array or omitted means no parameters)
+- Each parameter must have `name` and `value` fields
+
+### Weight Balancing
+- Sibling subjects/tests have their weights automatically balanced to sum to 100
+- Example: If you have 3 subjects with weights [30, 40, 50], they'll be scaled to [25, 33.33, 41.67]
+
+## Parameter Handling
+
+Parameters are converted from named objects to positional arguments when calling test functions:
+
+```json
+"parameters": [
+  {"name": "tag", "value": "div"},
+  {"name": "required_count", "value": 5}
+]
+```
+
+Becomes: `test_function.execute("div", 5, files=submission_files)`
+
+The order of parameters in the array determines the order of positional arguments.
+
+## Special File Values
+
+- `"file": "index.html"` - Target specific file
+- `"file": "all"` - Pass all submission files to test
+- `"file": null` or omitted - No specific file target
+
+## Migration from Schema v1.0
+
+### Old Format (v1.0)
+```json
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "html_structure": {
+        "weight": 40,
+        "tests": [
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "calls": [
+              ["div", 5],
+              ["h1", 2]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+### New Format (v2.0)
+```json
+{
+  "test_library": "web_dev",
+  "base": {
+    "weight": 100,
+    "subjects": [
+      {
+        "subject_name": "html_structure",
+        "weight": 40,
+        "tests": [
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "parameters": [
+              {"name": "tag", "value": "div"},
+              {"name": "required_count", "value": 5}
+            ]
+          },
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "parameters": [
+              {"name": "tag", "value": "h1"},
+              {"name": "required_count", "value": 2}
+            ]
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+### Key Differences
+1. Each test execution is now a separate test object (no `calls` array)
+2. Subjects use array format with `subject_name` field
+3. Parameters are named objects instead of positional arrays
+4. Added optional `test_library` field at root
+
+## Best Practices
+
+1. **Clear Naming**: Use descriptive `subject_name` values
+2. **Logical Grouping**: Group related tests under subjects
+3. **Weight Distribution**: Assign weights based on importance
+4. **Parameter Names**: Use clear parameter names that match test function signatures
+5. **File Organization**: Specify file paths relative to submission root
+
+## Pydantic Models
+
+The schema is validated using Pydantic models:
+
+- `CriteriaConfig` - Root configuration
+- `CategoryConfig` - Category (base/bonus/penalty)
+- `SubjectConfig` - Subject node
+- `TestConfig` - Test configuration
+- `ParameterConfig` - Named parameter
+
+These models provide:
+- Automatic validation
+- Type checking
+- Helpful error messages
+- IDE autocomplete support
+
diff --git a/docs/system/pipeline_tree_logic.md b/docs/system/pipeline_tree_logic.md
new file mode 100644
index 0000000..c585ce3
--- /dev/null
+++ b/docs/system/pipeline_tree_logic.md
@@ -0,0 +1,324 @@
+# Pipeline Logic for Criteria Tree and Tree Building
+
+## Overview
+
+The pipeline implements conditional logic to optimize grading based on the number of submissions. This design eliminates unnecessary tree construction overhead when grading single submissions while maintaining efficient batch processing for multiple submissions.
+
+## Key Concepts
+
+### Why Two Paths?
+
+**Single Submission Path**: When grading only one submission, building a criteria tree and then traversing it is redundant. We can directly process the criteria configuration and build the result tree in one pass.
+
+**Multiple Submissions Path**: When grading multiple submissions, the criteria tree becomes valuable because:
+- The tree structure is built once and reused for all submissions
+- Reduces redundant parsing and validation
+- Improves overall performance through tree reuse
+
+## Pipeline Flow Diagram
+
+### Single Submission Path (Optimized)
+```
+┌─────────────────────┐
+│  Criteria Config    │
+│  (JSON/Dict)        │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│    GradeStep        │
+│ grade_from_config() │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│   ResultTree        │
+│  (Final Score)      │
+└─────────────────────┘
+```
+
+### Multiple Submissions Path (Tree-Based)
+```
+┌─────────────────────┐
+│  Criteria Config    │
+│  (JSON/Dict)        │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│  BuildTreeStep      │
+│  (Build Once)       │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│  CriteriaTree       │
+│  (Reusable)         │
+└──────────┬──────────┘
+           │
+           ▼
+    ┌──────┴──────┐
+    │  For Each   │
+    │ Submission  │
+    └──────┬──────┘
+           │
+           ▼
+┌─────────────────────┐
+│    GradeStep        │
+│  grade_from_tree()  │
+└──────────┬──────────┘
+           │
+           ▼
+┌─────────────────────┐
+│   ResultTree        │
+│  (Per Submission)   │
+└─────────────────────┘
+```
+
+## Step Implementations
+
+### BuildTreeStep
+
+**Responsibility**: Construct hierarchical criteria tree from configuration
+
+**Input**: 
+- Criteria configuration (dict)
+- Template instance
+- Submission files (for validation)
+
+**Output**: 
+- `CriteriaTree` object (fully built with test functions resolved)
+
+**When Executed**: 
+- Only when `len(submissions) > 1`
+
+**Key Features**:
+- Validates JSON schema using Pydantic models
+- Resolves test functions from template library
+- Stores test parameters and file references
+- Builds complete tree structure with weights
+
+### GradeStep
+
+**Responsibility**: Execute grading and produce result tree
+
+**Input Detection Logic**:
+```python
+if isinstance(input_data, CriteriaTree):
+    # Use tree-based grading
+    result = grader_service.grade_from_tree(criteria_tree, submission)
+else:
+    # Use config-based grading
+    result = grader_service.grade_from_config(criteria_config, template, submission)
+```
+
+**Two Grading Methods**:
+
+1. **`grade_from_config()`** - Single submission optimization
+   - Directly processes criteria configuration
+   - Builds result tree while executing tests
+   - Single-pass algorithm (no tree pre-construction)
+
+2. **`grade_from_tree()`** - Multiple submission efficiency
+   - Traverses pre-built criteria tree
+   - Executes tests from tree nodes
+   - Builds result tree from criteria tree structure
+
+**Output**: 
+- `ResultTree` object with scores and feedback
+
+## Pipeline Configuration Logic
+
+### Automatic Path Selection
+
+```python
+def configure_pipeline(submissions: List[Submission], criteria_config: dict):
+    """
+    Automatically configures pipeline based on submission count.
+    """
+    if len(submissions) == 1:
+        # Single submission: Skip tree building
+        return [
+            PreFlightStep(),
+            LoadTemplateStep(),
+            GradeStep(),  # Uses grade_from_config
+            FeedbackStep(),
+            ExportStep()
+        ]
+    else:
+        # Multiple submissions: Build tree once, reuse
+        return [
+            PreFlightStep(),
+            LoadTemplateStep(),
+            BuildTreeStep(),  # Build criteria tree
+            GradeStep(),  # Uses grade_from_tree
+            FeedbackStep(),
+            ExportStep()
+        ]
+```
+
+## Data Flow Example
+
+### Single Submission Example
+
+**Input**:
+```json
+{
+  "criteria": {
+    "name": "HTML Assignment",
+    "tests": [
+      {"name": "check_title", "weight": 50},
+      {"name": "check_header", "weight": 50}
+    ]
+  },
+  "submissions": [
+    {"files": ["index.html"]}
+  ]
+}
+```
+
+**Flow**:
+1. Criteria config loaded as dict
+2. GradeStep detects dict input
+3. Calls `grade_from_config(criteria, template, submission)`
+4. Executes tests and builds result tree simultaneously
+5. Returns final result tree
+
+### Multiple Submissions Example
+
+**Input**:
+```json
+{
+  "criteria": { /* same as above */ },
+  "submissions": [
+    {"files": ["index.html"]},
+    {"files": ["index.html"]},
+    {"files": ["index.html"]}
+  ]
+}
+```
+
+**Flow**:
+1. Criteria config loaded as dict
+2. BuildTreeStep creates `CriteriaTree` (once)
+3. For each submission:
+   - GradeStep detects `CriteriaTree` input
+   - Calls `grade_from_tree(tree, submission)`
+   - Executes tests from tree
+   - Returns result tree for that submission
+4. Collects all result trees
+
+## Performance Implications
+
+### Single Submission
+- **Avoided Overhead**: No tree construction/traversal
+- **Memory**: Lower (no tree object created)
+- **Speed**: Faster for single grading
+- **Complexity**: O(n) where n = number of tests
+
+### Multiple Submissions
+- **Tree Construction**: One-time cost
+- **Per-Submission**: Fast traversal (reuse structure)
+- **Memory**: Higher (tree persists)
+- **Speed**: Faster overall for batch processing
+- **Complexity**: O(t + n*m) where t = tree building, n = submissions, m = tests
+
+## Error Handling
+
+### BuildTreeStep Errors
+- Missing test functions in template
+- Invalid JSON schema
+- Malformed criteria structure
+- **Result**: Pipeline fails early (before grading)
+
+### GradeStep Errors
+- Test execution failures
+- File access issues
+- Runtime errors in test functions
+- **Result**: Captured in ResultTree as test failures
+
+## Type Safety
+
+The GradeStep uses robust type checking to determine the grading method:
+
+```python
+from autograder.models.criteria_tree import CriteriaTree
+
+# Type checking
+if isinstance(input_data, CriteriaTree):
+    # Definitely a tree
+    use_grade_from_tree()
+elif isinstance(input_data, dict):
+    # Configuration dictionary
+    use_grade_from_config()
+else:
+    # Error: unexpected input type
+    raise TypeError("Invalid input type for GradeStep")
+```
+
+## Benefits of This Architecture
+
+### 1. Performance Optimization
+- Single submissions: No unnecessary tree overhead
+- Multiple submissions: Efficient tree reuse
+
+### 2. Flexibility
+- Same pipeline handles both scenarios
+- Automatic path selection based on input
+
+### 3. Maintainability
+- Clear separation of concerns
+- Each step has single responsibility
+- Easy to modify or extend
+
+### 4. Consistency
+- Both paths produce identical `ResultTree` output
+- Same scoring algorithm regardless of path
+- Unified error handling
+
+### 5. Testability
+- Each grading method can be tested independently
+- Clear input/output contracts
+- Easier to debug issues
+
+## Migration from Old Architecture
+
+### Old Approach Problems
+- Pre-executed trees (confusing concept)
+- AI Executor as lazy-loading proxy (complex)
+- Multiple traversals (inefficient)
+- Mixed responsibilities
+
+### New Approach Solutions
+- ✅ Single clear tree type: `CriteriaTree`
+- ✅ Result tree built during grading
+- ✅ Optional tree building (conditional)
+- ✅ Clear step responsibilities
+- ✅ Batch optimization handled separately
+
+## Future Enhancements
+
+### Potential Optimizations
+1. **Parallel Execution**: Grade multiple submissions in parallel
+2. **Caching**: Cache template loading across requests
+3. **Streaming**: Stream results as they complete
+4. **Incremental Results**: Return partial results for long-running grades
+
+### AI Executor Integration
+For AI-based tests (e.g., essay grading):
+- Collect all AI tests during tree traversal
+- Batch API calls (single request)
+- Map results back to result tree nodes
+- Minimize API latency impact
+
+## Conclusion
+
+The pipeline's conditional tree-building logic provides an optimal balance between simplicity and performance. By detecting submission count and automatically choosing the appropriate path, we achieve:
+
+- **Fast single-submission grading** (no tree overhead)
+- **Efficient batch processing** (tree reuse)
+- **Clean architecture** (clear separation)
+- **Type-safe execution** (runtime validation)
+
+This design sets a solid foundation for scaling the autograder system while maintaining code clarity and performance.
+

From 8fcc9e444fb8164074838657e9666f9966ed7be9 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Sun, 28 Dec 2025 23:14:39 -0300
Subject: [PATCH 08/49] feat: refactor import paths for models in various
 modules

---
 autograder/models/abstract/test_function.py         | 4 ++--
 autograder/models/dataclass/feedback_preferences.py | 1 -
 autograder/services/template_library_service.py     | 2 +-
 autograder/template_library/api_testing.py          | 6 +++---
 autograder/template_library/essay_grader.py         | 6 +++---
 autograder/template_library/input_output.py         | 6 +++---
 autograder/template_library/web_dev.py              | 6 +++---
 tests/autograder/__init__.py                        | 0
 8 files changed, 15 insertions(+), 16 deletions(-)
 delete mode 100644 tests/autograder/__init__.py

diff --git a/autograder/models/abstract/test_function.py b/autograder/models/abstract/test_function.py
index 1a15167..31c6503 100644
--- a/autograder/models/abstract/test_function.py
+++ b/autograder/models/abstract/test_function.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Dict, List, Optional
-from autograder.builder.models.criteria_tree import TestResult
-from autograder.builder.models.param_description import ParamDescription
+from autograder.models.dataclass.test_result import TestResult
+from autograder.models.dataclass.param_description import ParamDescription
 
 
 class TestFunction(ABC):
diff --git a/autograder/models/dataclass/feedback_preferences.py b/autograder/models/dataclass/feedback_preferences.py
index 38f7719..0f8fcaa 100644
--- a/autograder/models/dataclass/feedback_preferences.py
+++ b/autograder/models/dataclass/feedback_preferences.py
@@ -1,6 +1,5 @@
 from typing import List, Dict
 from dataclasses import dataclass, field
-from autograder.context import request_context
 
 
 @dataclass
diff --git a/autograder/services/template_library_service.py b/autograder/services/template_library_service.py
index 74ebb39..d7ac00a 100644
--- a/autograder/services/template_library_service.py
+++ b/autograder/services/template_library_service.py
@@ -1,6 +1,6 @@
 import importlib.util
 import inspect
-from autograder.builder.models.template import Template
+from autograder.models.abstract.template import Template
 
 
 class TemplateLibraryService:
diff --git a/autograder/template_library/api_testing.py b/autograder/template_library/api_testing.py
index cf62d5b..76074ee 100644
--- a/autograder/template_library/api_testing.py
+++ b/autograder/template_library/api_testing.py
@@ -2,9 +2,9 @@
 import requests
 import json
 import logging
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
 from autograder.models.dataclass.test_result import TestResult
 from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
diff --git a/autograder/template_library/essay_grader.py b/autograder/template_library/essay_grader.py
index 5dd61b0..5d0324b 100644
--- a/autograder/template_library/essay_grader.py
+++ b/autograder/template_library/essay_grader.py
@@ -1,7 +1,7 @@
 from autograder.utils.executors.ai_executor import ai_executor
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
 from autograder.models.dataclass.test_result import TestResult
 
 # ===============================================================
diff --git a/autograder/template_library/input_output.py b/autograder/template_library/input_output.py
index f9ec96e..6d8783d 100644
--- a/autograder/template_library/input_output.py
+++ b/autograder/template_library/input_output.py
@@ -1,6 +1,6 @@
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
 from autograder.models.dataclass.test_result import TestResult
 from autograder.utils.executors.sandbox_executor import SandboxExecutor
 
diff --git a/autograder/template_library/web_dev.py b/autograder/template_library/web_dev.py
index 8cd2073..8d88e06 100644
--- a/autograder/template_library/web_dev.py
+++ b/autograder/template_library/web_dev.py
@@ -3,9 +3,9 @@
 
 from bs4 import BeautifulSoup
 
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.param_description import ParamDescription
 
 from autograder.models.dataclass.test_result import TestResult
 
diff --git a/tests/autograder/__init__.py b/tests/autograder/__init__.py
deleted file mode 100644
index e69de29..0000000

From 1cc7b016dc37c04efc4440a8dede498336172c0a Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Sun, 28 Dec 2025 23:14:56 -0300
Subject: [PATCH 09/49] feat: add Pydantic models for criteria configuration
 validation

---
 .../models/dataclass/criteria_config.py       | 103 ++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 autograder/models/dataclass/criteria_config.py

diff --git a/autograder/models/dataclass/criteria_config.py b/autograder/models/dataclass/criteria_config.py
new file mode 100644
index 0000000..94cabe1
--- /dev/null
+++ b/autograder/models/dataclass/criteria_config.py
@@ -0,0 +1,103 @@
+"""
+Pydantic models for validating criteria configuration JSON structure.
+
+New schema structure:
+- Subjects are arrays with 'subject_name' field
+- Parameters are named objects: [{"name": "param", "value": "val"}, ...]
+- Tests contain parameters directly (no 'calls' array)
+- Root config has optional 'test_library' field
+"""
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Dict, Any, Optional, Union
+
+
+class ParameterConfig(BaseModel):
+    """Named parameter for a test function."""
+    name: str = Field(..., description="Parameter name")
+    value: Any = Field(..., description="Parameter value")
+
+    model_config = {"extra": "forbid"}
+
+
+class TestConfig(BaseModel):
+    """Configuration for a single test execution."""
+    name: str = Field(..., description="Name of the test function in the template")
+    file: Optional[str] = Field(None, description="Target file for the test (if applicable)")
+    parameters: Optional[List[ParameterConfig]] = Field(
+        default_factory=list,
+        description="Named parameters for the test function"
+    )
+
+    model_config = {"extra": "forbid"}
+
+    def get_args_list(self) -> List[Any]:
+        """Convert named parameters to positional arguments list."""
+        if not self.parameters:
+            return []
+        return [param.value for param in self.parameters]
+
+    def get_kwargs_dict(self) -> Dict[str, Any]:
+        """Convert named parameters to keyword arguments dictionary."""
+        if not self.parameters:
+            return {}
+        return {param.name: param.value for param in self.parameters}
+
+
+class SubjectConfig(BaseModel):
+    """Configuration for a subject node (can contain tests or nested subjects)."""
+    subject_name: str = Field(..., description="Name of the subject")
+    weight: float = Field(..., ge=0, le=100, description="Weight of this subject (0-100)")
+    tests: Optional[List[TestConfig]] = Field(None, description="Tests under this subject")
+    subjects: Optional[List['SubjectConfig']] = Field(None, description="Nested subjects")
+
+    model_config = {"extra": "forbid"}
+
+    def model_post_init(self, __context):
+        """Validate that subject has either tests or subjects, but not both or neither."""
+        has_tests = self.tests is not None and len(self.tests) > 0
+        has_subjects = self.subjects is not None and len(self.subjects) > 0
+
+        if has_tests and has_subjects:
+            raise ValueError(f"Subject '{self.subject_name}' cannot have both 'tests' and 'subjects'. Choose one.")
+        if not has_tests and not has_subjects:
+            raise ValueError(f"Subject '{self.subject_name}' must have either 'tests' or 'subjects'.")
+
+
+class CategoryConfig(BaseModel):
+    """Configuration for a category (base, bonus, or penalty)."""
+    weight: float = Field(..., ge=0, le=100, description="Weight of this category (0-100)")
+    subjects: Optional[List[SubjectConfig]] = Field(None, description="Subjects under this category (array)")
+    tests: Optional[List[TestConfig]] = Field(None, description="Tests directly under category")
+
+    model_config = {"extra": "forbid"}
+
+    def model_post_init(self, __context):
+        """Validate that category has either tests or subjects."""
+        has_tests = self.tests is not None and len(self.tests) > 0
+        has_subjects = self.subjects is not None and len(self.subjects) > 0
+
+        if has_tests and has_subjects:
+            raise ValueError("Category cannot have both 'tests' and 'subjects'. Choose one.")
+        if not has_tests and not has_subjects:
+            raise ValueError("Category must have either 'tests' or 'subjects'.")
+
+
+class CriteriaConfig(BaseModel):
+    """Root configuration for grading criteria."""
+    test_library: Optional[str] = Field(None, description="Name of the test library/template to use")
+    base: CategoryConfig = Field(..., description="Base grading criteria (required)")
+    bonus: Optional[CategoryConfig] = Field(None, description="Bonus points criteria")
+    penalty: Optional[CategoryConfig] = Field(None, description="Penalty criteria")
+
+    model_config = {"extra": "forbid"}
+
+    @classmethod
+    def from_dict(cls, data: dict) -> 'CriteriaConfig':
+        """Create and validate criteria config from dictionary."""
+        return cls.model_validate(data)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> 'CriteriaConfig':
+        """Create and validate criteria config from JSON string."""
+        return cls.model_validate_json(json_str)
+

From 04244c7e31f095b61b4b2edd6804f427260a3f05 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Sun, 28 Dec 2025 23:15:47 -0300
Subject: [PATCH 10/49] feat: update criteria tree models with embedded test
 functions

---
 autograder/autograder.py                      |  55 +-
 autograder/models/criteria_tree.py            | 328 +++++-----
 autograder/models/result_tree.py              | 391 ++++++++++++
 autograder/services/criteria_tree_service.py  | 579 ++++++++++--------
 autograder/services/grader_service.py         | 570 +++++++++++------
 autograder/steps/build_tree_step.py           |  53 +-
 autograder/steps/grade_step.py                | 103 +++-
 tests/data/custom_template/custom_template.py |   2 +-
 tests/test_pipeline_modes.py                  | 252 ++++++++
 tests/unit/test_pipeline_steps.py             | 331 ++++++++++
 10 files changed, 2016 insertions(+), 648 deletions(-)
 create mode 100644 autograder/models/result_tree.py
 create mode 100644 tests/test_pipeline_modes.py
 create mode 100644 tests/unit/test_pipeline_steps.py

diff --git a/autograder/autograder.py b/autograder/autograder.py
index a082d0d..e399ef4 100644
--- a/autograder/autograder.py
+++ b/autograder/autograder.py
@@ -16,20 +16,65 @@ def build_pipeline(
                  feedback_config,
                  setup_config = None,
                  custom_template = None,
-                 feedback_mode = None):
+                 feedback_mode = None,
+                 submission_files = None,
+                 submission_id = None,
+                 is_multi_submission = False):
+    """
+    Build an autograder pipeline based on configuration.
 
+    Args:
+        template_name: Name of the template to use
+        include_feedback: Whether to include feedback generation
+        grading_criteria: Criteria configuration dictionary
+        feedback_config: Configuration for feedback generation
+        setup_config: Pre-flight setup configuration
+        custom_template: Custom template object (if any)
+        feedback_mode: Mode for feedback generation
+        submission_files: Student submission files
+        submission_id: Optional submission identifier
+        is_multi_submission: Whether grading multiple submissions (requires tree building)
+
+    Returns:
+        Configured AutograderPipeline
+    """
     pipeline = AutograderPipeline()
+
+    # Pre-flight checks (if configured)
     if setup_config:
         pipeline.add_step(PreFlightStep(setup_config))
-    pipeline.add_step(TemplateLoaderStep(template_name,custom_template))
-    pipeline.add_step(BuildTreeStep(grading_criteria))
-    pipeline.add_step(GradeStep())
+
+    # Load template
+    pipeline.add_step(TemplateLoaderStep(template_name, custom_template))
+
+    # Conditional tree building and grading based on submission count
+    if is_multi_submission:
+        # Multi-submission mode: Build tree once, then grade
+        pipeline.add_step(BuildTreeStep(grading_criteria))
+        pipeline.add_step(GradeStep(
+            submission_files=submission_files,
+            submission_id=submission_id
+        ))
+    else:
+        # Single submission mode: Grade directly from config (one-pass)
+        pipeline.add_step(GradeStep(
+            criteria_json=grading_criteria,
+            submission_files=submission_files,
+            submission_id=submission_id
+        ))
+
+    # Feedback generation (if configured)
     if include_feedback:
         reporter_service = ReporterFactory.create_reporter_for(feedback_mode)
-        pipeline.add_step(FeedbackStep(reporter_service,feedback_config))
+        pipeline.add_step(FeedbackStep(reporter_service, feedback_config))
+
+    # Export results
     pipeline.add_step(ExporterStep(UpstashDriver))
+
     return pipeline
 
 
 
 
+
+
diff --git a/autograder/models/criteria_tree.py b/autograder/models/criteria_tree.py
index af4ec4e..b740448 100644
--- a/autograder/models/criteria_tree.py
+++ b/autograder/models/criteria_tree.py
@@ -1,222 +1,178 @@
-from typing import List, Any
-from autograder.models.dataclass.test_result import TestResult
+"""
+Updated Criteria Tree models with embedded test functions.
 
+These models represent the grading criteria structure with test functions
+embedded during tree building (no more lazy loading or pre-execution).
+"""
+from typing import List, Optional, Any
+from dataclasses import dataclass, field
 
-# Assuming TestResult is defined in a separate, importable file
-# from autograder.core.models.test_result import TestResult
 
-# ===============================================================
-# 1. Classes for Test Execution
-# ===============================================================
-class TestCall:
-    """Represents a single invocation of a test function with its arguments."""
-    def __init__(self, args: List[Any]):
-        self.args = args
+@dataclass
+class TestNode:
+    """
+    Leaf node representing a single test execution configuration.
+
+    Contains:
+    - Test function reference (from template)
+    - Parameters for execution
+    - File target (if applicable)
+    - Category and subject context
+    """
+    name: str
+    test_name: str
+    test_function: Any  # TestFunction instance from template
+    parameters: List[Any] = field(default_factory=list)
+    file_target: Optional[str] = None
+    category_name: str = ""
+    subject_name: str = ""
+    weight: float = 100.0
 
     def __repr__(self):
-        return f"TestCall(args={self.args})"
+        params_str = f", params={self.parameters}" if self.parameters else ""
+        file_str = f", file={self.file_target}" if self.file_target else ""
+        return f"TestNode({self.test_name}{params_str}{file_str})"
 
-# ===============================================================
-# 2. Classes for the Tree Structure
-# ===============================================================
 
-class Test:
+@dataclass
+class SubjectNode:
     """
-    Represents a group of calls to a single test function in the library.
-    This is a LEAF node in the grading tree.
+    Branch node representing a subject/topic in the grading criteria.
+
+    Can contain either:
+    - Nested subjects (recursive structure)
+    - Test nodes (leaf level)
     """
-    def __init__(self, name: str, filename: str = None):
-        self.name = name
-        self.file = filename  # The file this test operates on (e.g., "index.html")
-        self.calls: List[TestCall] = []
-
-    def add_call(self, call: TestCall):
-        self.calls.append(call)
-
-    def get_result(self, test_library, submission_files, subject_name: str) -> List[TestResult]:
-        """
-        Retrieves a TestFunction object from the library and executes it for each TestCall.
-        """
-        try:
-            # Get the TestFunction instance (e.g., HasTag()) from the library
-            test_function_instance = test_library.get_test(self.name)
-        except AttributeError as e:
-            return [TestResult(self.name, 0, f"ERROR: {e}", subject_name)]
-
-        file_content_to_pass = None
-        if self.file:
-            # --- File Injection Logic ---
-            if self.file == "all":
-                file_content_to_pass = submission_files
-            else:
-                file_content_to_pass = submission_files.get(self.file)
-                if file_content_to_pass is None:
-                    return [TestResult(self.name, 0, f"Erro: O arquivo necessário '{self.file}' não foi encontrado na submissão.", subject_name)]
-
-        # --- Execution Logic ---
-        if not self.calls:
-            # Execute with just the file content if no specific calls are defined
-            if file_content_to_pass:
-                result = test_function_instance.execute(file_content_to_pass)
-            else:
-                result = test_function_instance.execute()
-            result.subject_name = subject_name
-            return [result]
-
-        results = []
-        for call in self.calls:
-            # Execute the 'execute' method of the TestFunction instance
-            if file_content_to_pass:
-                result = test_function_instance.execute(file_content_to_pass, *call.args)
-            else:
-                result = test_function_instance.execute(*call.args)
-            result.subject_name = subject_name
-            results.append(result)
-        return results
+    name: str
+    weight: float
+    subjects: List['SubjectNode'] = field(default_factory=list)
+    tests: List[TestNode] = field(default_factory=list)
 
     def __repr__(self):
-        return f"Test(name='{self.name}', file='{self.file}', calls={len(self.calls)})"
+        if self.subjects:
+            return f"SubjectNode({self.name}, weight={self.weight}, subjects={len(self.subjects)})"
+        return f"SubjectNode({self.name}, weight={self.weight}, tests={len(self.tests)})"
 
-class Subject:
-    """
-    Represents a subject, which can contain EITHER a list of tests OR
-    a dictionary of nested subjects. This is a BRANCH or LEAF-HOLDER node.
-    """
-    def __init__(self, name, weight=0):
-        self.name = name
-        self.weight = weight
-        self.tests: List[Test] | None = None
-        self.subjects: dict[str, 'Subject'] | None = None
+    def get_all_tests(self) -> List[TestNode]:
+        """Recursively collect all test nodes under this subject."""
+        tests = []
 
-    def __repr__(self):
-        if self.subjects is not None:
-            return f"Subject(name='{self.name}', weight={self.weight}, subjects={len(self.subjects)})"
-        return f"Subject(name='{self.name}', weight={self.weight}, tests={self.tests})"
+        if self.tests:
+            tests.extend(self.tests)
+
+        if self.subjects:
+            for subject in self.subjects:
+                tests.extend(subject.get_all_tests())
 
+        return tests
 
-class TestCategory:
+
+@dataclass
+class CategoryNode:
     """
-    Represents one of the three main categories: base, bonus, or penalty.
-    Can contain EITHER a list of tests OR a dictionary of subjects (not both).
+    Top-level category node (base, bonus, or penalty).
+
+    Can contain either:
+    - Subjects (organized hierarchy)
+    - Tests (flat structure)
     """
-    def __init__(self, name, max_score=100):
-        self.name = name
-        self.max_score = max_score
-        self.subjects: dict[str, Subject] | None = None
-        self.tests: List[Test] | None = None
+    name: str
+    weight: float
+    subjects: List[SubjectNode] = field(default_factory=list)
+    tests: List[TestNode] = field(default_factory=list)
 
-    def set_weight(self, weight):
-        self.max_score = weight
+    def __repr__(self):
+        if self.subjects:
+            return f"CategoryNode({self.name}, weight={self.weight}, subjects={len(self.subjects)})"
+        return f"CategoryNode({self.name}, weight={self.weight}, tests={len(self.tests)})"
 
-    def add_subject(self, subject: Subject):
-        if self.subjects is None:
-            self.subjects = {}
-        self.subjects[subject.name] = subject
+    def get_all_tests(self) -> List[TestNode]:
+        """Recursively collect all test nodes under this category."""
+        tests = []
 
-    def __repr__(self):
-        if self.tests is not None:
-            return f"TestCategory(name='{self.name}', max_score={self.max_score}, tests={len(self.tests)})"
-        return f"TestCategory(name='{self.name}', max_score={self.max_score}, subjects={list(self.subjects.keys()) if self.subjects else []})"
+        if self.tests:
+            tests.extend(self.tests)
 
+        if self.subjects:
+            for subject in self.subjects:
+                tests.extend(subject.get_all_tests())
 
+        return tests
+
+
+@dataclass
 class CriteriaTree:
-    """The ROOT of the criteria tree."""
-    def __init__(self, bonus_weight=0, penalty_weight=0):
-        self.base = TestCategory("base")
-        self.bonus = TestCategory("bonus", max_score=bonus_weight)
-        self.penalty = TestCategory("penalty", max_score=penalty_weight)
+    """
+    Root of the criteria tree structure.
+
+    Contains three main categories:
+    - base: Required grading criteria
+    - bonus: Optional bonus points
+    - penalty: Optional penalty points
+    """
+    base: Optional[CategoryNode] = None
+    bonus: Optional[CategoryNode] = None
+    penalty: Optional[CategoryNode] = None
 
     def __repr__(self):
-        return f"Criteria(categories=['base', 'bonus', 'penalty'])"
+        categories = []
+        if self.base:
+            categories.append("base")
+        if self.bonus:
+            categories.append("bonus")
+        if self.penalty:
+            categories.append("penalty")
+        return f"CriteriaTree(categories={categories})"
+
+    def get_all_tests(self) -> List[TestNode]:
+        """Get all test nodes from the entire tree."""
+        tests = []
+
+        if self.base:
+            tests.extend(self.base.get_all_tests())
+        if self.bonus:
+            tests.extend(self.bonus.get_all_tests())
+        if self.penalty:
+            tests.extend(self.penalty.get_all_tests())
+
+        return tests
 
     def print_tree(self):
-        """Prints a visual representation of the entire criteria tree."""
-        print(f"🌲 Criteria Tree")
-        self._print_category(self.base, prefix="  ")
-        self._print_category(self.bonus, prefix="  ")
-        self._print_category(self.penalty, prefix="  ")
-
-    def _print_category(self, category: TestCategory, prefix: str):
-        """Helper method to print a category and its subjects or tests."""
-        if not category.subjects and not category.tests:
-            return
-        print(f"{prefix}📁 {category.name.upper()} (max_score: {category.max_score})")
-        
-        if category.subjects:
-            for subject in category.subjects.values():
-                self._print_subject(subject, prefix=prefix + "    ")
-        
-        if category.tests:
-            for test in category.tests:
-                print(f"{prefix}    - 🧪 {test.name} (file: {test.file})")
-                for call in test.calls:
-                    print(f"{prefix}      - Parameters: {call.args}")
+        """Print a visual representation of the criteria tree."""
+        print("🌲 Criteria Tree")
 
-    def _print_subject(self, subject: Subject, prefix: str):
-        """Recursive helper method to print a subject and its contents."""
-        print(f"{prefix}📘 {subject.name} (weight: {subject.weight})")
+        if self.base:
+            self._print_category(self.base, "  ")
+        if self.bonus:
+            self._print_category(self.bonus, "  ")
+        if self.penalty:
+            self._print_category(self.penalty, "  ")
 
-        if subject.subjects is not None:
-            for sub in subject.subjects.values():
-                self._print_subject(sub, prefix=prefix + "    ")
+    def _print_category(self, category: CategoryNode, prefix: str):
+        """Print a category and its contents."""
+        print(f"{prefix}📁 {category.name.upper()} (weight: {category.weight})")
 
-        if subject.tests is not None:
-            for test in subject.tests:
-                print(f"{prefix}  - 🧪 {test.name} (file: {test.file})")
-                for call in test.calls:
-                    print(f"{prefix}    - Parameters: {call.args}")
-
-    def print_pre_executed_tree(self):
-        """Prints a visual representation of the entire pre-executed criteria tree."""
-        print(f"🌲 Pre-Executed Criteria Tree")
-        self._print_pre_executed_category(self.base, prefix="  ")
-        self._print_pre_executed_category(self.bonus, prefix="  ")
-        self._print_pre_executed_category(self.penalty, prefix="  ")
-
-    def _print_pre_executed_category(self, category: TestCategory, prefix: str):
-        """Helper method to print a category and its pre-executed subjects or tests."""
-        if not category.subjects and not category.tests:
-            return
-        print(f"{prefix}📁 {category.name.upper()} (max_score: {category.max_score})")
-        
         if category.subjects:
-            for subject in category.subjects.values():
-                self._print_pre_executed_subject(subject, prefix=prefix + "    ")
-        
-        if category.tests:
-            # In a pre-executed tree, category.tests contains TestResult objects
-            for result in category.tests:
-                if isinstance(result, TestResult):
-                    params_str = f" (Parameters: {result.parameters})" if result.parameters else ""
-                    print(f"{prefix}    - 📝 {result.test_name}{params_str} -> Score: {result.score}")
-                else:
-                    print(f"{prefix}    - ? Unexpected item in tests list: {result}")
-
-    def _print_pre_executed_subject(self, subject: Subject, prefix: str):
-        """Recursive helper method to print a subject and its pre-executed test results."""
-        print(f"{prefix}📘 {subject.name} (weight: {subject.weight})")
-
-        if subject.subjects is not None:
-            for sub in subject.subjects.values():
-                self._print_pre_executed_subject(sub, prefix=prefix + "    ")
+            for subject in category.subjects:
+                self._print_subject(subject, prefix + "    ")
 
-        if subject.tests is not None:
-            # In a pre-executed tree, subject.tests contains TestResult objects
-
-            # In the regular tree, subject.tests contains "Test" objects
-            for result in subject.tests:
-                if isinstance(result, TestResult):
-                    params_str = f" (Parameters: {result.parameters})" if result.parameters else ""
-                    print(f"{prefix}  - 📝 {result.test_name}{params_str} -> Score: {result.score}")
+        if category.tests:
+            for test in category.tests:
+                params = f"({test.parameters})" if test.parameters else "()"
+                file_info = f" [file: {test.file_target}]" if test.file_target else ""
+                print(f"{prefix}    🧪 {test.test_name}{params}{file_info}")
 
-                elif isinstance(result, Test):
-                    print(f"{prefix} - 🧪 {result.name} (file: {result.file})")
-                    """Added the symbol identificator to match the previous formatting"""
-                    for call in result.calls:
-                        print(f"{prefix}    - Parameters: {call.args}")
-                else:
-                    # Fallback for unexpected types
-                    print(f"{prefix}  - ? Unexpected item in tests list: {result}")
+    def _print_subject(self, subject: SubjectNode, prefix: str):
+        """Recursively print a subject and its contents."""
+        print(f"{prefix}📘 {subject.name} (weight: {subject.weight})")
 
+        if subject.subjects:
+            for child in subject.subjects:
+                self._print_subject(child, prefix + "    ")
 
+        if subject.tests:
+            for test in subject.tests:
+                params = f"({test.parameters})" if test.parameters else "()"
+                file_info = f" [file: {test.file_target}]" if test.file_target else ""
+                print(f"{prefix}    🧪 {test.test_name}{params}{file_info}")
 
diff --git a/autograder/models/result_tree.py b/autograder/models/result_tree.py
new file mode 100644
index 0000000..92d3584
--- /dev/null
+++ b/autograder/models/result_tree.py
@@ -0,0 +1,391 @@
+"""
+Models for the Result Tree - represents executed grading results.
+The result tree mirrors the criteria structure but contains actual execution results.
+"""
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Any
+from enum import Enum
+
+
+class NodeType(Enum):
+    """Types of nodes in the result tree."""
+    CATEGORY = "category"
+    SUBJECT = "subject"
+    TEST = "test"
+
+
+@dataclass
+class ResultNode:
+    """
+    Base node for the result tree.
+
+    Represents a grading category or subject with a calculated score
+    based on its children's scores and weights.
+    """
+    name: str
+    node_type: NodeType
+    weight: float
+    score: float = 0.0
+    max_possible: float = 100.0
+    children: List['ResultNode'] = field(default_factory=list)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def calculate_score(self) -> float:
+        """
+        Calculate this node's score based on children.
+        For leaf nodes (tests), score is already set.
+        For parent nodes, calculate weighted average of children.
+
+        Special case: ROOT node with BASE/BONUS/PENALTY uses additive scoring:
+        - Base score (0-100)
+        - Bonus adds points (bonus_score * bonus_weight / 100)
+        - Penalty subtracts points (penalty_score * penalty_weight / 100)
+        """
+        if self.node_type == NodeType.TEST:
+            # Leaf node - score already set from test execution
+            return self.score
+
+        if not self.children:
+            return 0.0
+
+        # Calculate children scores first
+        for child in self.children:
+            child.calculate_score()
+
+        # Check if this is a ROOT node with BASE/BONUS/PENALTY categories
+        child_names = {c.name.lower() for c in self.children}
+        is_root_with_categories = (
+            self.name.lower() == "root" and
+            "base" in child_names
+        )
+
+        if is_root_with_categories:
+            # Additive scoring for BASE/BONUS/PENALTY
+            base_score = 0.0
+            bonus_points = 0.0
+            penalty_points = 0.0
+
+            for child in self.children:
+                child_name = child.name.lower()
+                if child_name == "base":
+                    base_score = child.score
+                elif child_name == "bonus":
+                    # Bonus adds: (bonus_score / 100) * bonus_weight
+                    bonus_points = (child.score / 100.0) * child.weight
+                elif child_name == "penalty":
+                    # Penalty subtracts: (penalty_score / 100) * penalty_weight
+                    penalty_points = (child.score / 100.0) * child.weight
+
+            # Final score = base + bonus - penalty (capped at 0-100)
+            self.score = max(0.0, min(100.0, base_score + bonus_points - penalty_points))
+        else:
+            # Standard weighted average for other nodes
+            total_weight = sum(c.weight for c in self.children)
+            if total_weight == 0:
+                return 0.0
+
+            weighted_sum = sum(c.score * c.weight for c in self.children)
+            self.score = weighted_sum / total_weight
+
+        return self.score
+
+    def to_dict(self) -> dict:
+        """Convert result node to dictionary representation."""
+        return {
+            "name": self.name,
+            "type": self.node_type.value,
+            "weight": self.weight,
+            "score": round(self.score, 2),
+            "max_possible": self.max_possible,
+            "children": [child.to_dict() for child in self.children],
+            "metadata": self.metadata
+        }
+
+
+@dataclass
+class TestResultNode(ResultNode):
+    """
+    Leaf node representing a single test execution.
+
+    Contains the actual test result and execution details.
+    """
+    test_name: str = ""
+    test_function: Any = None  # Reference to the actual test function
+    test_params: List[Any] = field(default_factory=list)
+    file_target: Optional[str] = None
+    execution_result: Optional[Any] = None  # TestResult object after execution
+    error_message: Optional[str] = None
+    passed: bool = False
+
+    def __post_init__(self):
+        """Set node type to TEST."""
+        self.node_type = NodeType.TEST
+
+    def execute(self, submission_files: Dict[str, Any]) -> float:
+        """
+        Execute the test function with provided parameters.
+        Updates score, passed status, and execution_result.
+
+        Returns:
+            The test score (0-100)
+        """
+        if not self.test_function:
+            self.error_message = "No test function assigned"
+            self.score = 0.0
+            self.passed = False
+            return 0.0
+
+        try:
+            # Execute the test function
+            # The test function should return a TestResult object
+            self.execution_result = self.test_function.execute(
+                *self.test_params,
+                files=submission_files
+            )
+
+            # Extract score from result
+            if hasattr(self.execution_result, 'score'):
+                self.score = float(self.execution_result.score)
+            else:
+                self.score = 100.0 if self.execution_result else 0.0
+
+            # Check if test passed (score >= 50 is considered passing)
+            self.passed = self.score >= 50
+
+            # Store result report/message
+            if hasattr(self.execution_result, 'report'):
+                self.metadata['report'] = self.execution_result.report
+            elif hasattr(self.execution_result, 'message'):
+                self.metadata['message'] = self.execution_result.message
+
+            return self.score
+
+        except Exception as e:
+            self.error_message = f"Test execution failed: {str(e)}"
+            self.score = 0.0
+            self.passed = False
+            self.metadata['error'] = str(e)
+            return 0.0
+
+    def to_dict(self) -> dict:
+        """Convert test result node to dictionary with execution details."""
+        base_dict = super().to_dict()
+        base_dict.update({
+            "test_name": self.test_name,
+            "file_target": self.file_target,
+            "passed": self.passed,
+            "error_message": self.error_message,
+            "params": self.test_params
+        })
+        return base_dict
+
+
+@dataclass
+class ResultTree:
+    """
+    Complete result tree for a grading session.
+
+    Contains the root node and provides methods for score calculation
+    and tree traversal.
+    """
+    root: ResultNode
+    submission_id: Optional[str] = None
+    template_name: Optional[str] = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def calculate_final_score(self) -> float:
+        """
+        Calculate and return the final score by traversing the tree.
+
+        Returns:
+            Final score (0-100)
+        """
+        return self.root.calculate_score()
+
+    def get_all_test_results(self) -> List[TestResultNode]:
+        """Get all test result nodes from the tree."""
+        results = []
+        self._collect_tests(self.root, results)
+        return results
+
+    def _collect_tests(self, node: ResultNode, collector: List[TestResultNode]):
+        """Recursively collect all test nodes."""
+        if isinstance(node, TestResultNode):
+            collector.append(node)
+        else:
+            for child in node.children:
+                self._collect_tests(child, collector)
+
+    def get_failed_tests(self) -> List[TestResultNode]:
+        """Get all failed test nodes."""
+        return [test for test in self.get_all_test_results() if not test.passed]
+
+    def get_passed_tests(self) -> List[TestResultNode]:
+        """Get all passed test nodes."""
+        return [test for test in self.get_all_test_results() if test.passed]
+
+    def to_dict(self) -> dict:
+        """Convert entire result tree to dictionary."""
+        return {
+            "submission_id": self.submission_id,
+            "template_name": self.template_name,
+            "final_score": round(self.root.score, 2),
+            "tree": self.root.to_dict(),
+            "metadata": self.metadata,
+            "summary": {
+                "total_tests": len(self.get_all_test_results()),
+                "passed_tests": len(self.get_passed_tests()),
+                "failed_tests": len(self.get_failed_tests())
+            }
+        }
+
+    def print_tree(self, show_details: bool = True):
+        """
+        Print a visual representation of the result tree.
+
+        Args:
+            show_details: If True, show test parameters and error messages
+        """
+        print("\n" + "=" * 70)
+        print("🎯 RESULT TREE")
+        print("=" * 70)
+
+        # Print header info
+        if self.submission_id:
+            print(f"📝 Submission: {self.submission_id}")
+        if self.template_name:
+            print(f"📋 Template: {self.template_name}")
+
+        print(f"🏆 Final Score: {self.root.score:.2f}/100")
+
+        summary = {
+            "total": len(self.get_all_test_results()),
+            "passed": len(self.get_passed_tests()),
+            "failed": len(self.get_failed_tests())
+        }
+        print(f"📊 Tests: {summary['total']} total | "
+              f"✅ {summary['passed']} passed | "
+              f"❌ {summary['failed']} failed")
+
+        print("\n" + "-" * 70)
+
+        # Print tree structure
+        self._print_node(self.root, "", show_details)
+
+        print("=" * 70 + "\n")
+
+    def _print_node(self, node: ResultNode, prefix: str, show_details: bool):
+        """Recursively print a node and its children."""
+        if isinstance(node, TestResultNode):
+            self._print_test_node(node, prefix, show_details)
+        else:
+            self._print_parent_node(node, prefix, show_details)
+
+    def _print_parent_node(self, node: ResultNode, prefix: str, show_details: bool):
+        """Print a category or subject node."""
+        # Choose icon based on node type
+        if node.node_type == NodeType.CATEGORY:
+            icon = "📁"
+            name = node.name.upper()
+        else:  # SUBJECT
+            icon = "📘"
+            name = node.name
+
+        # Color code score
+        score_str = f"{node.score:.1f}"
+        if node.score >= 80:
+            score_color = "🟢"
+        elif node.score >= 60:
+            score_color = "🟡"
+        else:
+            score_color = "🔴"
+
+        print(f"{prefix}{icon} {name} "
+              f"[weight: {node.weight:.0f}%] "
+              f"{score_color} {score_str}/100")
+
+        # Print children
+        for child in node.children:
+            self._print_node(child, prefix + "    ", show_details)
+
+    def _print_test_node(self, node: TestResultNode, prefix: str, show_details: bool):
+        """Print a test result node."""
+        # Status icon
+        status = "✅" if node.passed else "❌"
+
+        # Score with color
+        if node.score >= 80:
+            score_color = "🟢"
+        elif node.score >= 60:
+            score_color = "🟡"
+        else:
+            score_color = "🔴"
+
+        # Basic test info
+        test_info = f"{prefix}🧪 {node.test_name} {status}"
+
+        # Add file target if present
+        if node.file_target:
+            test_info += f" [file: {node.file_target}]"
+
+        # Add score
+        test_info += f" {score_color} {node.score:.1f}/100"
+
+        print(test_info)
+
+        # Show details if requested
+        if show_details:
+            # Show parameters
+            if node.test_params:
+                params_str = ", ".join(str(p) for p in node.test_params)
+                print(f"{prefix}    ⚙️  params: ({params_str})")
+
+            # Show error message if failed
+            if node.error_message:
+                print(f"{prefix}    ⚠️  error: {node.error_message}")
+
+            # Show metadata report/message
+            if 'report' in node.metadata:
+                report = node.metadata['report']
+                # Truncate long reports
+                if len(report) > 80:
+                    report = report[:77] + "..."
+                print(f"{prefix}    💬 {report}")
+
+    def print_summary(self):
+        """Print a compact summary of the results."""
+        print("\n" + "=" * 70)
+        print("📊 GRADING SUMMARY")
+        print("=" * 70)
+
+        if self.submission_id:
+            print(f"Submission: {self.submission_id}")
+
+        print(f"\n🏆 Final Score: {self.root.score:.2f}/100")
+
+        # Test statistics
+        all_tests = self.get_all_test_results()
+        passed = self.get_passed_tests()
+        failed = self.get_failed_tests()
+
+        print(f"\n📈 Test Results:")
+        print(f"   Total:  {len(all_tests)}")
+        print(f"   ✅ Passed: {len(passed)} ({len(passed)/len(all_tests)*100:.1f}%)")
+        print(f"   ❌ Failed: {len(failed)} ({len(failed)/len(all_tests)*100:.1f}%)")
+
+        # Score distribution
+        if all_tests:
+            avg_score = sum(t.score for t in all_tests) / len(all_tests)
+            print(f"\n📊 Average Test Score: {avg_score:.2f}")
+
+        # Show failed tests if any
+        if failed:
+            print(f"\n❌ Failed Tests:")
+            for test in failed:
+                print(f"   • {test.test_name}: {test.score:.1f}/100")
+                if test.error_message:
+                    print(f"     Error: {test.error_message}")
+
+        print("=" * 70 + "\n")
+
+
diff --git a/autograder/services/criteria_tree_service.py b/autograder/services/criteria_tree_service.py
index f4d5282..6c97885 100644
--- a/autograder/services/criteria_tree_service.py
+++ b/autograder/services/criteria_tree_service.py
@@ -1,271 +1,320 @@
-from typing import List, Dict, Any
+"""
+Refactored CriteriaTreeService - builds criteria trees with embedded test functions.
+
+This service is responsible for:
+- Building CriteriaTree from validated CriteriaConfig
+- Matching and embedding test functions from templates during tree building
+- Validating that all tests exist in the template
+- Balancing weights across sibling nodes
+"""
+import logging
+from typing import List, Dict, Any, Optional
+
+from autograder.models.criteria_tree import CriteriaTree, CategoryNode, SubjectNode, TestNode
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.criteria_config import (
+    CriteriaConfig,
+    CategoryConfig,
+    SubjectConfig,
+    TestConfig
+)
 
-from autograder.builder.models.criteria_tree import Criteria, Subject, Test, TestCall, TestResult
-from autograder.builder.models.template import Template
-from autograder.context import request_context
 
 class CriteriaTreeService:
-    """A factory for creating a Criteria object from a configuration dictionary."""
-    @staticmethod
-    def build_pre_executed_tree(template: Template) -> Criteria:
-        """ Builds a Criteria tree and pre-executes all tests, having leaves as TestResult objects."""
-
-        request = request_context.get_request()
-        config_dict = request.assignment_config.criteria
-        submission_files = request.submission_files
-        criteria = Criteria()
-
-        for category_name in ["base", "bonus", "penalty"]:
-            if category_name in config_dict:
-                category = getattr(criteria, category_name)
-                category_data = config_dict[category_name]
-
-                if "weight" in category_data:
-                    category.max_score = category_data.get("weight", 100)
-
-                # Validate that category doesn't have both subjects and tests
-                if "subjects" in category_data and "tests" in category_data:
-                    raise ValueError(f"Config error: Category '{category_name}' cannot have both 'tests' and 'subjects'.")
-
-                if "subjects" in category_data:
-                    subjects = [
-                        CriteriaTree._parse_and_execute_subject(s_name, s_data, template, submission_files)
-                        for s_name, s_data in category_data["subjects"].items()
-                    ]
-                    CriteriaTree._balance_subject_weights(subjects)
-                    for subject in subjects:
-                        category.add_subject(subject)
-                elif "tests" in category_data:
-                    # Handle tests directly at category level
-                    parsed_tests = CriteriaTree._parse_tests(category_data["tests"])
-                    executed_tests = []
-                    for test in parsed_tests:
-                        test_results = test.get_result(template, submission_files, category_name)
-                        executed_tests.extend(test_results)
-                    category.tests = executed_tests
-        return criteria
-
-    @staticmethod
-    def build_non_executed_tree() -> Criteria:
-        """Builds the entire criteria tree, including balancing subject weights."""
-        criteria = Criteria()
-        request = request_context.get_request()
-        config_dict = request.assignment_config.criteria
-        for category_name in ["base", "bonus", "penalty"]:
-            if category_name in config_dict:
-                category = getattr(criteria, category_name)
-                category_data = config_dict[category_name]
-
-                # Set max_score for bonus and penalty categories
-                if "weight" in category_data:
-                    category.max_score = category_data.get("weight", 100)
-
-                # Validate that category doesn't have both subjects and tests
-                if "subjects" in category_data and "tests" in category_data:
-                    raise ValueError(f"Config error: Category '{category_name}' cannot have both 'tests' and 'subjects'.")
-
-                if "subjects" in category_data:
-                    subjects = [
-                        CriteriaTree._parse_subject(s_name, s_data)
-                        for s_name, s_data in category_data["subjects"].items()
-                    ]
-                    CriteriaTree._balance_subject_weights(subjects)
-                    for subject in subjects:
-                        category.add_subject(subject)
-                elif "tests" in category_data:
-                    # Handle tests directly at category level
-                    category.tests = CriteriaTree._parse_tests(category_data["tests"])
-        return criteria
-
-    @staticmethod
-    def _balance_subject_weights(subjects: List[Subject]):
-        """Balances the weights of a list of sibling subjects to sum to 100."""
-        total_weight = sum(s.weight for s in subjects)
-        if total_weight > 0 and total_weight != 100:
-            scaling_factor = 100 / total_weight
-            for subject in subjects:
-                subject.weight *= scaling_factor
-
-    @staticmethod
-    def _parse_subject(subject_name: str, subject_data: dict) -> Subject:
-        """Recursively parses a subject and balances the weights of its children."""
-        if "tests" in subject_data and "subjects" in subject_data:
-            raise ValueError(f"Config error: Subject '{subject_name}' cannot have both 'tests' and 'subjects'.")
-
-        subject = Subject(subject_name, subject_data.get("weight", 0))
-        if "tests" in subject_data:
-            subject.tests = CriteriaTree._parse_tests(subject_data["tests"])
-        elif "subjects" in subject_data:
-            child_subjects = [
-                CriteriaTree._parse_subject(sub_name, sub_data)
-                for sub_name, sub_data in subject_data["subjects"].items()
-            ]
-            CriteriaTree._balance_subject_weights(child_subjects)
-            subject.subjects = {s.name: s for s in child_subjects}
-        else:
-            subject.tests = []
-        return subject
+    """
+    Service for building criteria trees from validated configuration.
+
+    The tree building process now:
+    1. Validates criteria config using Pydantic models
+    2. Matches test functions from template during building
+    3. Embeds test functions and parameters directly in TestNodes
+    4. Balances weights across siblings
+
+    This eliminates the need for pre-executed trees and improves error handling.
+    """
+
+    def __init__(self):
+        self.logger = logging.getLogger("CriteriaTreeService")
+
+    def build_tree(
+        self,
+        criteria_config: CriteriaConfig,
+        template: Template
+    ) -> CriteriaTree:
+        """
+        Build a complete criteria tree from validated configuration.
+
+        Args:
+            criteria_config: Validated criteria configuration
+            template: Template containing test functions
+
+        Returns:
+            Complete CriteriaTree with embedded test functions
+
+        Raises:
+            ValueError: If test function not found in template
+        """
+        self.logger.info("Building criteria tree")
+
+        tree = CriteriaTree()
+
+        # Build base category (required)
+        tree.base = self._build_category(
+            "base",
+            criteria_config.base,
+            template
+        )
+
+        # Build bonus category (optional)
+        if criteria_config.bonus:
+            tree.bonus = self._build_category(
+                "bonus",
+                criteria_config.bonus,
+                template
+            )
+
+        # Build penalty category (optional)
+        if criteria_config.penalty:
+            tree.penalty = self._build_category(
+                "penalty",
+                criteria_config.penalty,
+                template
+            )
+
+        self.logger.info("Criteria tree built successfully")
+        return tree
+
+    def _build_category(
+        self,
+        category_name: str,
+        category_config: CategoryConfig,
+        template: Template
+    ) -> CategoryNode:
+        """Build a category node from configuration."""
+        self.logger.debug(f"Building category: {category_name}")
+
+        category = CategoryNode(name=category_name, weight=category_config.weight)
+
+        # Category can have either subjects or tests
+        if category_config.subjects:
+            subjects = []
+            # Subjects are now an array with subject_name field
+            for subject_config in category_config.subjects:
+                subject = self._build_subject(
+                    subject_config.subject_name,
+                    subject_config,
+                    template,
+                    category_name
+                )
+                subjects.append(subject)
+
+            # Balance subject weights
+            self._balance_weights(subjects)
+            category.subjects = subjects
+
+        elif category_config.tests:
+            tests = self._build_tests(
+                category_config.tests,
+                template,
+                category_name,
+                category_name  # Use category as subject name
+            )
+            category.tests = tests
+
+        return category
+
+    def _build_subject(
+        self,
+        subject_name: str,
+        subject_config: SubjectConfig,
+        template: Template,
+        category_name: str
+    ) -> SubjectNode:
+        """Recursively build a subject node from configuration."""
+        self.logger.debug(f"Building subject: {subject_name}")
+
+        subject = SubjectNode(name=subject_name, weight=subject_config.weight)
+
+        # Subject can have either nested subjects or tests
+        if subject_config.subjects:
+            child_subjects = []
+            # Subjects are now an array with subject_name field
+            for child_config in subject_config.subjects:
+                child = self._build_subject(
+                    child_config.subject_name,
+                    child_config,
+                    template,
+                    category_name
+                )
+                child_subjects.append(child)
+
+            # Balance child weights
+            self._balance_weights(child_subjects)
+            subject.subjects = child_subjects
+
+        elif subject_config.tests:
+            tests = self._build_tests(
+                subject_config.tests,
+                template,
+                category_name,
+                subject_name
+            )
+            subject.tests = tests
 
-    @staticmethod
-    def _parse_and_execute_subject(subject_name: str, subject_data: dict, template: Template, submission_files: dict) -> Subject:
-        """Recursively parses a subject, executes its tests, and balances the weights of its children."""
-        if "tests" in subject_data and "subjects" in subject_data:
-            raise ValueError(f"Config error: Subject '{subject_name}' cannot have both 'tests' and 'subjects'.")
-
-        subject = Subject(subject_name, subject_data.get("weight", 0))
-
-        if "tests" in subject_data:
-            parsed_tests = CriteriaTree._parse_tests(subject_data["tests"])
-            executed_tests = []
-            for test in parsed_tests:
-                # The run method executes the test and returns a list of TestResult objects
-                test_results = test.get_result(template, submission_files, subject_name)
-                executed_tests.extend(test_results)
-            subject.tests = executed_tests  # Store TestResult objects instead of Test objects
-        elif "subjects" in subject_data:
-            child_subjects = [
-                CriteriaTree._parse_and_execute_subject(sub_name, sub_data, template, submission_files)
-                for sub_name, sub_data in subject_data["subjects"].items()
-            ]
-            CriteriaTree._balance_subject_weights(child_subjects)
-            subject.subjects = {s.name: s for s in child_subjects}
-        else:
-            subject.tests = []
         return subject
 
-    @staticmethod
-    def _parse_tests(test_data: list) -> List[Test]:
-        """Parses a list of test definitions from the configuration."""
-        parsed_tests = []
-        for test_item in test_data:
-            if isinstance(test_item, str):
-                # Handle simple test names (e.g., "check_no_unclosed_tags")
-                test = Test(name=test_item)  # Default file
-                test.add_call(TestCall(args=[]))
-                parsed_tests.append(test)
-
-            elif isinstance(test_item, dict):
-                # Handle complex test definitions
-                test_name = test_item.get("name")
-                test_file = test_item.get("file")
-                if not test_name:
-                    raise ValueError(f"Test definition is missing 'name': {test_item}")
-
-                test = Test(name=test_name, filename=test_file)
-
-                if "calls" in test_item:
-                    for call_args in test_item["calls"]:
-                        test.add_call(TestCall(args=call_args))
-                else:
-                    # If no 'calls' are specified, it's a single call with no arguments
-                    test.add_call(TestCall(args=[]))
-
-                parsed_tests.append(test)
-
-        return parsed_tests
-
-
-
-if __name__ == "__main__":
-    criteria_json = {
-  "test_library": "essay ai grader",
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "foundations": {
-        "weight": 60,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "thesis_statement"
-          },
-          {
-            "file": "essay.txt",
-            "name": "clarity_and_cohesion"
-          },
-          {
-            "file": "essay.txt",
-            "name": "grammar_and_spelling"
-          }
-        ]
-      },
-      "prompt_adherence": {
-        "weight": 40,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "adherence_to_prompt",
-            "calls": [
-              [ "Analyze the primary causes of the Industrial Revolution and its impact on 19th-century society." ]
-            ]
-          }
-        ]
-      }
-    }
-  },
-  "bonus": {
-    "weight": 30,
-    "subjects": {
-      "rhetorical_skill": {
-        "weight": 70,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "counterargument_handling"
-          },
-          {
-            "file": "essay.txt",
-            "name": "vocabulary_and_diction"
-          },
-          {
-            "file": "essay.txt",
-            "name": "sentence_structure_variety"
-          }
-        ]
-      },
-      "deeper_analysis": {
-        "weight": 30,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "topic_connection",
-            "calls": [
-              [ "technological innovation", "social inequality" ]
-            ]
-          }
-        ]
-      }
-    }
-  },
-  "penalty": {
-    "weight": 25,
-    "subjects": {
-      "logical_integrity": {
-        "weight": 100,
-        "tests": [
-          {
-            "file": "essay.txt",
-            "name": "logical_fallacy_check"
-          },
-          {
-            "file": "essay.txt",
-            "name": "bias_detection"
-          },
-          {
-              "file": "essay.txt",
-              "name": "originality_and_plagiarism"
-          }
-        ]
-      }
-    }
-  }
-}
-    submission_files = {"essay.txt": """Artificial intelligence (AI) is no longer a concept confined to science fiction; it is a transformative force actively reshaping industries and redefining the nature of work. Its integration into the modern workforce presents a profound duality: on one hand, it offers unprecedented opportunities for productivity and innovation, while on the other, it poses significant challenges related to job displacement and economic inequality. Navigating this transition successfully requires a proactive and nuanced approach from policymakers, businesses, and individuals alike.
-The primary benefit of AI in the workplace is its capacity to augment human potential and drive efficiency. AI-powered systems can analyze vast datasets in seconds, automating routine cognitive and manual tasks, which frees human workers to focus on more complex, creative, and strategic endeavors. For instance, in medicine, AI algorithms assist radiologists in detecting tumors with greater accuracy, while in finance, they identify fraudulent transactions far more effectively than any human team. This collaboration between human and machine not only boosts output but also creates new roles centered around AI development, ethics, and system maintenance—jobs that did not exist a decade ago.
-However, this technological advancement casts a significant shadow of disruption. The same automation that drives efficiency also leads to job displacement, particularly for roles characterized by repetitive tasks. Assembly line workers, data entry clerks, and even some paralegal roles face a high risk of obsolescence. This creates a widening skills gap, where demand for high-level technical skills soars while demand for traditional skills plummets. Without robust mechanisms for reskilling and upskilling the existing workforce, this gap threatens to exacerbate socio-economic inequality, creating a divide between those who can command AI and those who are displaced by it. There are many gramatical errors in this sentence, for testing purposes.
-The most critical challenge, therefore, is not to halt technological progress but to manage its societal impact. A multi-pronged strategy is essential. Governments and educational institutions must collaborate to reform curricula, emphasizing critical thinking, digital literacy, and lifelong learning. Furthermore, corporations have a responsibility to invest in their employees through continuous training programs. Finally, strengthening social safety nets, perhaps through concepts like Universal Basic Income (UBI) or enhanced unemployment benefits, may be necessary to support individuals as they navigate this volatile transition period.
-In conclusion, AI is a double-edged sword. Its potential to enhance productivity and create new avenues for growth is undeniable, but so are the risks of displacement and inequality. The future of work will not be a battle of humans versus machines, but rather a story of adaptation. By investing in education, promoting equitable policies, and fostering a culture of continuous learning, we can harness the power of AI to build a more prosperous and inclusive workforce for all."""}
-    #tree = CriteriaTree.build_pre_executed_tree(criteria_json, WebDevLibrary(), submission_files)
-    tree = CriteriaTree.build_non_executed_tree(criteria_json)
-    #tree.print_pre_executed_tree()
-    tree.print_tree()
\ No newline at end of file
+    def _build_tests(
+        self,
+        test_configs: List[TestConfig],
+        template: Template,
+        category_name: str,
+        subject_name: str
+    ) -> List[TestNode]:
+        """
+        Build test nodes from configuration with embedded test functions.
+
+        New schema: Each test has named parameters directly (no 'calls' array).
+        Creates one TestNode per test configuration.
+        """
+        test_nodes = []
+
+        for test_index, test_config in enumerate(test_configs):
+            # Find matching test function in template
+            test_function = self._find_test_function(test_config.name, template)
+
+            if not test_function:
+                available_tests = "unknown"
+                if hasattr(template, 'get_available_tests'):
+                    try:
+                        available_tests = ', '.join(template.get_available_tests())
+                    except:
+                        pass
+                error_msg = (
+                    f"Test function '{test_config.name}' not found in template. "
+                    f"Available tests: {available_tests}"
+                )
+                self.logger.error(error_msg)
+                raise ValueError(error_msg)
+
+            # Convert named parameters to args list
+            params = test_config.get_args_list() if test_config.parameters else []
+
+            # Create single test node for this test configuration
+            test_node = TestNode(
+                name=f"{test_config.name}_{test_index}",
+                test_name=test_config.name,
+                test_function=test_function,
+                parameters=params,
+                file_target=test_config.file,
+                category_name=category_name,
+                subject_name=subject_name,
+                weight=100.0  # Will be balanced with siblings
+            )
+            test_nodes.append(test_node)
+
+            self.logger.debug(
+                f"Created test node: {test_node.name} with params {params}"
+            )
+
+        # Balance weights across all test nodes at this level
+        if test_nodes:
+            self._balance_weights(test_nodes)
+
+        return test_nodes
+
+    def _find_test_function(
+        self,
+        test_name: str,
+        template: Template
+    ) -> Optional[TestFunction]:
+        """
+        Find a test function by name in the template.
+
+        Args:
+            test_name: Name of the test function to find
+            template: Template to search in
+
+        Returns:
+            TestFunction if found, None otherwise
+        """
+        try:
+            return template.get_test(test_name)
+        except (AttributeError, KeyError):
+            return None
+
+    def _balance_weights(self, nodes: List) -> None:
+        """
+        Balance weights of sibling nodes to sum to 100.
+
+        Args:
+            nodes: List of sibling nodes (SubjectNode or TestNode)
+        """
+        if not nodes:
+            return
+
+        total_weight = sum(node.weight for node in nodes)
+
+        if total_weight == 0:
+            # If all weights are 0, distribute equally
+            equal_weight = 100.0 / len(nodes)
+            for node in nodes:
+                node.weight = equal_weight
+            self.logger.debug(f"Distributed equal weights: {equal_weight} each")
+        elif total_weight != 100:
+            # Scale weights to sum to 100
+            scale_factor = 100.0 / total_weight
+            for node in nodes:
+                node.weight *= scale_factor
+            self.logger.debug(f"Balanced weights with scale factor: {scale_factor}")
+
+
+class CriteriaTreeBuilder:
+    """
+    Convenience builder class for creating criteria trees.
+
+    Usage:
+        builder = CriteriaTreeBuilder()
+        tree = (builder
+            .from_dict(criteria_dict)
+            .with_template(template)
+            .build())
+    """
+
+    def __init__(self):
+        self._config: Optional[CriteriaConfig] = None
+        self._template: Optional[Template] = None
+        self._service = CriteriaTreeService()
+
+    def from_dict(self, criteria_dict: dict) -> 'CriteriaTreeBuilder':
+        """Load and validate criteria from dictionary."""
+        self._config = CriteriaConfig.from_dict(criteria_dict)
+        return self
+
+    def from_json(self, criteria_json: str) -> 'CriteriaTreeBuilder':
+        """Load and validate criteria from JSON string."""
+        self._config = CriteriaConfig.from_json(criteria_json)
+        return self
+
+    def with_config(self, config: CriteriaConfig) -> 'CriteriaTreeBuilder':
+        """Use an already validated CriteriaConfig."""
+        self._config = config
+        return self
+
+    def with_template(self, template: Template) -> 'CriteriaTreeBuilder':
+        """Set the template to use."""
+        self._template = template
+        return self
+
+    def build(self) -> CriteriaTree:
+        """Build the criteria tree."""
+        if not self._config:
+            raise ValueError("Criteria configuration not set. Use from_dict() or from_json()")
+        if not self._template:
+            raise ValueError("Template not set. Use with_template()")
+
+        return self._service.build_tree(
+            self._config,
+            self._template
+        )
+
diff --git a/autograder/services/grader_service.py b/autograder/services/grader_service.py
index c1be7a2..a2d28ab 100644
--- a/autograder/services/grader_service.py
+++ b/autograder/services/grader_service.py
@@ -1,208 +1,414 @@
-from typing import Optional
+"""
+Enhanced GraderService - can build result trees from CriteriaTree or raw config.
 
-from autograder.services.criteria_tree_service import *
-from autograder.models.dataclass.result import Result
-from autograder.models.dataclass.test_result import TestResult
+This service handles two grading flows:
+1. Single submission: Build result tree directly from criteria config (one-pass)
+2. Multiple submissions: Build result tree from pre-built criteria tree (reusable)
+"""
+import logging
+from typing import Dict, Any, Optional, List
+
+from autograder.models.criteria_tree import CriteriaTree, CategoryNode, SubjectNode, TestNode
+from autograder.models.result_tree import ResultTree, ResultNode, TestResultNode, NodeType
+from autograder.models.abstract.template import Template
+from autograder.models.dataclass.criteria_config import CriteriaConfig
+from autograder.services.criteria_tree_service import CriteriaTreeService
 
 
 class GraderService:
     """
-    Traverses a Criteria tree, executes tests, and calculates a weighted score.
-    Only includes scores from categories (base, bonus, penalty) that contain tests.
+    Service for executing grading and building result trees.
+
+    Supports two modes:
+    1. Direct grading: Build result tree from criteria config (single submission)
+    2. Tree-based grading: Build result tree from criteria tree (multiple submissions)
     """
 
-    def __init__(self, criteria_tree: 'Criteria', test_library: object):
-        self.criteria = criteria_tree
-        self.test_library = test_library
-        self.base_results: List['TestResult'] = []
-        self.bonus_results: List['TestResult'] = []
-        self.penalty_results: List['TestResult'] = []
-
-    def run(self) -> 'Result':
-        request = request_context.get_request()
-        submission_files = request.submission_files
-        author_name = request.student_name
-        final_score = self._run(submission_files)
-        return Result(
-            final_score=final_score,
-            author=author_name,
-            submission_files=submission_files,
-            base_results=self.base_results,
-            bonus_results=self.bonus_results,
-            penalty_results=self.penalty_results
-        )
+    def __init__(self):
+        self.logger = logging.getLogger("GraderService")
+        self._criteria_service = CriteriaTreeService()
+
+    def grade_from_config(
+        self,
+        criteria_config: CriteriaConfig,
+        template: Template,
+        submission_files: Dict[str, Any],
+        submission_id: Optional[str] = None
+    ) -> ResultTree:
+        """
+        Grade a submission directly from criteria configuration (one-pass).
 
+        This is optimized for single submissions - builds and executes in one traversal.
 
-    def _run(self, submission_files: Dict) -> float:
-        """
-        Runs the entire grading process and returns the final calculated score.
-        """
-        print("\n--- STARTING GRADING PROCESS ---")
-        # Step 1: Grade categories. The methods will return None if no tests exist.
-        ## CHANGED: Coalesce None to 0.0 to signify that an empty category contributes nothing to the score.
-        base_score = self._grade_subject_or_category(self.criteria.base, submission_files, self.base_results) or 0.0
-        bonus_score = self._grade_subject_or_category(self.criteria.bonus, submission_files, self.bonus_results) or 0.0
-        penalty_points = self._calculate_penalty_points(self.criteria.penalty, submission_files,
-                                                        self.penalty_results) or 0.0
-
-        # Step 3: Apply the final scoring logic
-        final_score = self._calculate_final_score(base_score, bonus_score, penalty_points)
-
-        print("\n--- GRADING COMPLETE ---")
-        print(f"Aggregated Base Score: {base_score:.2f}")
-        print(f"Aggregated Bonus Score: {bonus_score:.2f}")
-        print(f"Total Penalty Points to Subtract: {penalty_points:.2f}")
-        print("-" * 25)
-        print(f"Final Calculated Score: {final_score:.2f}")
-        print("-" * 25)
-
-        return final_score
-
-    def _grade_subject_or_category(self, current_node: 'Subject' or 'TestCategory', submission_files: Dict,
-                                   results_list: List['TestResult'], depth=0) -> Optional[float]:
-        """
-        Recursively grades a subject or category, returning a weighted score or None if no tests are found.
+        Args:
+            criteria_config: Validated criteria configuration
+            template: Template with test functions
+            submission_files: Student submission files
+            submission_id: Optional identifier for the submission
+
+        Returns:
+            Complete ResultTree with all tests executed
         """
-        prefix = "    " * depth
-
-        # Base case: Node is a leaf with tests
-        if hasattr(current_node, 'tests') and current_node.tests:
-            print(f"\n{prefix}📘 Grading {current_node.name}...")
-            subject_test_results = []
-            for test in current_node.tests:
-                test_results = test.get_result(self.test_library, submission_files, current_node.name)
-                subject_test_results.extend(test_results)
-
-            if not subject_test_results:
-                return None  # No tests were actually run
-
-            results_list.extend(subject_test_results)
-            scores = [res.score for res in subject_test_results]
-            average_score = sum(scores) / len(scores)
-            print(f"{prefix}  -> Average score: {average_score:.2f}")
-            return average_score
-
-        # Recursive case: Node is a branch (category or subject with sub-subjects)
-        child_subjects_classes = getattr(current_node, 'subjects', {})
-        if not child_subjects_classes:
-            return None  # No tests and no children means this branch is empty
-        child_subjects = child_subjects_classes.values()
-        if not child_subjects:
-            return None
-        print(f"\n{prefix}📘 Grading {current_node.name}...")
-
-        child_scores_map = {sub.name: self._grade_subject_or_category(sub, submission_files, results_list, depth + 1)
-                            for sub in child_subjects}
-
-        # Filter out children that had no tests (returned None)
-        valid_children = [sub for sub in child_subjects if child_scores_map[sub.name] is not None]
-
-        if not valid_children:
-            return None  # No children in this branch contained any tests
-
-        total_weight = sum(sub.weight for sub in valid_children)
-
-        # If weights are 0, calculate a simple average of the valid children
-        if total_weight == 0:
-            scores = [child_scores_map[sub.name] for sub in valid_children]
-            return sum(scores) / len(scores)
+        self.logger.info(f"Grading from config for submission: {submission_id}")
 
-        # Otherwise, calculate the weighted score based only on valid children
-        weighted_score = 0
-        for sub in valid_children:
-            child_score = child_scores_map[sub.name]
-            weighted_score += child_score * (sub.weight / total_weight)
+        # Build root result node
+        root = ResultNode(
+            name="root",
+            node_type=NodeType.CATEGORY,
+            weight=100.0
+        )
 
-        print(f"\n{prefix}  -> Weighted score for '{current_node.name}': {weighted_score:.2f}")
-        return weighted_score
+        # Build and execute base category (required)
+        base_result = self._build_and_execute_category(
+            "base",
+            criteria_config.base,
+            template,
+            submission_files
+        )
+        root.children.append(base_result)
+
+        # Build and execute bonus category (optional)
+        if criteria_config.bonus:
+            bonus_result = self._build_and_execute_category(
+                "bonus",
+                criteria_config.bonus,
+                template,
+                submission_files
+            )
+            root.children.append(bonus_result)
+
+        # Build and execute penalty category (optional)
+        if criteria_config.penalty:
+            penalty_result = self._build_and_execute_category(
+                "penalty",
+                criteria_config.penalty,
+                template,
+                submission_files
+            )
+            root.children.append(penalty_result)
+
+        # Create result tree and calculate scores
+        result_tree = ResultTree(
+            root=root,
+            submission_id=submission_id,
+            template_name=template.name if hasattr(template, 'name') else None
+        )
 
-    def _calculate_penalty_points(self, penalty_category: 'TestCategory', submission_files: Dict,
-                                  results_list: List['TestResult']) -> Optional[float]:
-        """
-        Calculates the total penalty points. Returns None if no penalty tests exist.
-        """
-        print(f"\n Penalizing {penalty_category.name}...")
+        # Handle AI executor batch if needed
+        if hasattr(template, 'execution_helper') and template.execution_helper:
+            self.logger.info("Executing AI batch requests")
+            template.execution_helper.stop()
 
-        # This is a simplified entry point; the main logic is in _calculate_subject_penalty
-        # We treat the main penalty category like a subject to start the recursion.
-        return self._calculate_subject_penalty(penalty_category, submission_files, results_list, depth=0)
+        # Calculate final scores
+        final_score = result_tree.calculate_final_score()
+        self.logger.info(f"Grading complete. Final score: {final_score}")
 
-    def _calculate_subject_penalty(self, subject: 'Subject', submission_files: Dict, results_list: List['TestResult'],
-                                   depth=0) -> Optional[float]:
-        """
-        Helper to calculate penalty for a single subject or category.
-        Returns penalty points (0-100) or None if no tests are found.
+        return result_tree
+
+    def grade_from_tree(
+        self,
+        criteria_tree: CriteriaTree,
+        submission_files: Dict[str, Any],
+        submission_id: Optional[str] = None
+    ) -> ResultTree:
         """
-        prefix = "    " * depth
-
-        # Base Case: This node is a leaf with tests
-        if hasattr(subject, 'tests') and subject.tests:
-            test_penalties = []
-            for test in subject.tests:
-                test_results = test.get_result(self.test_library, submission_files, subject.name)
-                if not test_results:
-                    continue
-                results_list.extend(test_results)
-                # Penalty incurred = 100 - score
-                penalty_incurred = (100 - sum(res.score for res in test_results) / len(test_results))
-                test_penalties.append(penalty_incurred)
-
-            if not test_penalties:
-                return None  # No tests were actually run
-
-            avg_penalty_for_subject = sum(test_penalties) / len(test_penalties)
-            print(f"{prefix}  -> Average penalty for '{subject.name}': {avg_penalty_for_subject:.2f}")
-            return avg_penalty_for_subject
-
-        # Recursive Case: This node is a branch with children
-        child_subjects_classes = getattr(subject, 'subjects', {})
-        if not child_subjects_classes:
-            return None  # No tests and no children
-        child_subjects = child_subjects_classes.values()
-        child_penalties_map = {sub.name: self._calculate_subject_penalty(sub, submission_files, results_list, depth + 1)
-                               for sub in child_subjects}
-
-        valid_children = [sub for sub in child_subjects if child_penalties_map[sub.name] is not None]
-
-        if not valid_children:
-            return None  # No children had penalty tests
-
-        total_weight = sum(sub.weight for sub in valid_children)
-        if total_weight == 0:
-            penalties = [child_penalties_map[sub.name] for sub in valid_children]
-            return sum(penalties) / len(penalties)  # Average of valid penalties
+        Grade a submission using a pre-built criteria tree.
 
-        weighted_penalty = 0
-        for sub in valid_children:
-            child_penalty = child_penalties_map[sub.name]
-            weighted_penalty += child_penalty * (sub.weight / total_weight)
+        This is optimized for multiple submissions - reuses the same criteria tree.
 
-        print(f"\n{prefix}  -> Weighted penalty for '{subject.name}': {weighted_penalty:.2f}")
-        return weighted_penalty
+        Args:
+            criteria_tree: Pre-built criteria tree with test functions
+            submission_files: Student submission files
+            submission_id: Optional identifier for the submission
 
-    def _calculate_final_score(self, base_score: float, bonus_score: float, penalty_points: float) -> float:
+        Returns:
+            Complete ResultTree with all tests executed
         """
-        Applies the final scoring logic with the corrected penalty calculation.
-        """
-        bonus_weight = self.criteria.bonus.max_score
-        penalty_weight = self.criteria.penalty.max_score
+        self.logger.info(f"Grading from tree for submission: {submission_id}")
+
+        # Build root result node
+        root = ResultNode(
+            name="root",
+            node_type=NodeType.CATEGORY,
+            weight=100.0
+        )
+
+        # Execute base category
+        if criteria_tree.base:
+            base_result = self._execute_category(
+                criteria_tree.base,
+                submission_files
+            )
+            root.children.append(base_result)
+
+        # Execute bonus category
+        if criteria_tree.bonus:
+            bonus_result = self._execute_category(
+                criteria_tree.bonus,
+                submission_files
+            )
+            root.children.append(bonus_result)
+
+        # Execute penalty category
+        if criteria_tree.penalty:
+            penalty_result = self._execute_category(
+                criteria_tree.penalty,
+                submission_files
+            )
+            root.children.append(penalty_result)
+
+        # Create result tree
+        result_tree = ResultTree(
+            root=root,
+            submission_id=submission_id
+        )
+
+        # Handle AI executor batch if needed
+        # Note: For tree-based grading, the template is embedded in test nodes
+        first_test = self._find_first_test(criteria_tree.base)
+        if first_test and hasattr(first_test, 'test_function'):
+            test_func = first_test.test_function
+            if hasattr(test_func, 'executor') and test_func.executor:
+                self.logger.info("Executing AI batch requests")
+                test_func.executor.stop()
+
+        # Calculate final scores
+        final_score = result_tree.calculate_final_score()
+        self.logger.info(f"Grading complete. Final score: {final_score}")
+
+        return result_tree
+
+    def _build_and_execute_category(
+        self,
+        category_name: str,
+        category_config,
+        template: Template,
+        submission_files: Dict[str, Any]
+    ) -> ResultNode:
+        """Build and execute a category in one pass."""
+        category_result = ResultNode(
+            name=category_name,
+            node_type=NodeType.CATEGORY,
+            weight=category_config.weight
+        )
+
+        # Category has either subjects or tests
+        if category_config.subjects:
+            # Subjects are now an array with subject_name field
+            for subject_config in category_config.subjects:
+                subject_result = self._build_and_execute_subject(
+                    subject_config.subject_name,
+                    subject_config,
+                    template,
+                    submission_files,
+                    category_name
+                )
+                category_result.children.append(subject_result)
+
+            # Balance weights
+            self._balance_weights(category_result.children)
+
+        elif category_config.tests:
+            test_results = self._build_and_execute_tests(
+                category_config.tests,
+                template,
+                submission_files,
+                category_name,
+                category_name
+            )
+            category_result.children.extend(test_results)
+
+        return category_result
+
+    def _build_and_execute_subject(
+        self,
+        subject_name: str,
+        subject_config,
+        template: Template,
+        submission_files: Dict[str, Any],
+        category_name: str
+    ) -> ResultNode:
+        """Recursively build and execute a subject in one pass."""
+        subject_result = ResultNode(
+            name=subject_name,
+            node_type=NodeType.SUBJECT,
+            weight=subject_config.weight
+        )
+
+        # Subject has either nested subjects or tests
+        if subject_config.subjects:
+            # Subjects are now an array with subject_name field
+            for child_config in subject_config.subjects:
+                child_result = self._build_and_execute_subject(
+                    child_config.subject_name,
+                    child_config,
+                    template,
+                    submission_files,
+                    category_name
+                )
+                subject_result.children.append(child_result)
+
+            # Balance weights
+            self._balance_weights(subject_result.children)
+
+        elif subject_config.tests:
+            test_results = self._build_and_execute_tests(
+                subject_config.tests,
+                template,
+                submission_files,
+                category_name,
+                subject_name
+            )
+            subject_result.children.extend(test_results)
+
+        return subject_result
+
+    def _build_and_execute_tests(
+        self,
+        test_configs: List,
+        template: Template,
+        submission_files: Dict[str, Any],
+        category_name: str,
+        subject_name: str
+    ) -> List[TestResultNode]:
+        """Build and execute test nodes."""
+        test_results = []
+
+        for test_index, test_config in enumerate(test_configs):
+            # Find test function
+            test_function = template.get_test(test_config.name)
+
+            if not test_function:
+                raise ValueError(
+                    f"Test '{test_config.name}' not found in template"
+                )
+
+            # Convert named parameters to args list
+            params = test_config.get_args_list() if test_config.parameters else []
+
+            # Create and execute test node
+            test_node = TestResultNode(
+                name=f"{test_config.name}_{test_index}",
+                node_type=NodeType.TEST,
+                weight=100.0,  # Will be balanced
+                test_name=test_config.name,
+                test_function=test_function,
+                test_params=params,
+                file_target=test_config.file
+            )
+
+            # Execute test
+            test_node.execute(submission_files)
+            test_results.append(test_node)
+
+        # Balance weights
+        if test_results:
+            self._balance_weights(test_results)
+
+        return test_results
+
+    def _execute_category(
+        self,
+        category_node: CategoryNode,
+        submission_files: Dict[str, Any]
+    ) -> ResultNode:
+        """Execute a category from criteria tree."""
+        category_result = ResultNode(
+            name=category_node.name,
+            node_type=NodeType.CATEGORY,
+            weight=category_node.weight
+        )
+
+        # Execute subjects
+        if hasattr(category_node, 'subjects') and category_node.subjects:
+            for subject in category_node.subjects:
+                subject_result = self._execute_subject(subject, submission_files)
+                category_result.children.append(subject_result)
+
+        # Execute tests
+        if hasattr(category_node, 'tests') and category_node.tests:
+            for test in category_node.tests:
+                test_result = self._execute_test(test, submission_files)
+                category_result.children.append(test_result)
+
+        return category_result
+
+    def _execute_subject(
+        self,
+        subject_node: SubjectNode,
+        submission_files: Dict[str, Any]
+    ) -> ResultNode:
+        """Execute a subject from criteria tree."""
+        subject_result = ResultNode(
+            name=subject_node.name,
+            node_type=NodeType.SUBJECT,
+            weight=subject_node.weight
+        )
+
+        # Execute nested subjects
+        if hasattr(subject_node, 'subjects') and subject_node.subjects:
+            for child in subject_node.subjects:
+                child_result = self._execute_subject(child, submission_files)
+                subject_result.children.append(child_result)
+
+        # Execute tests
+        if hasattr(subject_node, 'tests') and subject_node.tests:
+            for test in subject_node.tests:
+                test_result = self._execute_test(test, submission_files)
+                subject_result.children.append(test_result)
+
+        return subject_result
+
+    def _execute_test(
+        self,
+        test_node: TestNode,
+        submission_files: Dict[str, Any]
+    ) -> TestResultNode:
+        """Execute a single test from criteria tree."""
+        result_node = TestResultNode(
+            name=test_node.name,
+            node_type=NodeType.TEST,
+            weight=test_node.weight,
+            test_name=test_node.test_name,
+            test_function=test_node.test_function,
+            test_params=test_node.parameters,
+            file_target=test_node.file_target
+        )
 
-        final_score = base_score
+        # Execute the test
+        result_node.execute(submission_files)
 
-        if final_score < 100:
-            bonus_points_earned = (bonus_score / 100) * bonus_weight
-            final_score += bonus_points_earned
+        return result_node
 
-        final_score = min(100.0, final_score)
+    def _balance_weights(self, nodes: List[ResultNode]) -> None:
+        """Balance weights of sibling nodes to sum to 100."""
+        if not nodes:
+            return
 
-        # The penalty_points now represents the percentage of the total penalty to apply
-        penalty_points_to_subtract = (penalty_points / 100) * penalty_weight
-        final_score -= penalty_points_to_subtract
+        total_weight = sum(node.weight for node in nodes)
 
-        print(f"\nApplying Final Calculations:")
-        print(f"  Base Score: {base_score:.2f}")
-        print(f"  Bonus Points Added: {(bonus_score / 100) * bonus_weight:.2f}")
-        print(f"  Score Before Penalty: {min(100.0, final_score + penalty_points_to_subtract):.2f}")
-        print(f"  Penalty Points Subtracted: {penalty_points_to_subtract:.2f}")
+        if total_weight == 0:
+            equal_weight = 100.0 / len(nodes)
+            for node in nodes:
+                node.weight = equal_weight
+        elif total_weight != 100:
+            scale_factor = 100.0 / total_weight
+            for node in nodes:
+                node.weight *= scale_factor
+
+    def _find_first_test(self, node) -> Optional[TestNode]:
+        """Find the first test node in the tree."""
+        if isinstance(node, TestNode):
+            return node
+
+        if hasattr(node, 'tests') and node.tests:
+            return node.tests[0]
+
+        if hasattr(node, 'subjects') and node.subjects:
+            for subject in node.subjects:
+                result = self._find_first_test(subject)
+                if result:
+                    return result
+
+        return None
 
-        return max(0.0, final_score)
\ No newline at end of file
diff --git a/autograder/steps/build_tree_step.py b/autograder/steps/build_tree_step.py
index 7ca274c..6aea989 100644
--- a/autograder/steps/build_tree_step.py
+++ b/autograder/steps/build_tree_step.py
@@ -2,12 +2,59 @@
 from autograder.models.criteria_tree import CriteriaTree
 from autograder.models.abstract.step import Step
 from autograder.models.abstract.template import Template
+from autograder.models.dataclass.criteria_config import CriteriaConfig
+from autograder.models.dataclass.step_result import StepResult, StepStatus
 
 
 class BuildTreeStep(Step):
+    """
+    Step that builds a CriteriaTree from validated criteria configuration.
+
+    This step is used when grading multiple submissions with the same criteria.
+    The tree is built once and reused for efficiency.
+    """
+
     def __init__(self, criteria_json: dict):
+        """
+        Initialize the build tree step.
+
+        Args:
+            criteria_json: Raw criteria configuration dictionary
+        """
         self._criteria_json = criteria_json
-        self._criteria_tree_service = CriteriaTreeService
+        self._criteria_tree_service = CriteriaTreeService()
+
+    def execute(self, input: Template) -> StepResult[CriteriaTree]:
+        """
+        Build a criteria tree from the configuration and template.
+
+        Args:
+            input: Template containing test functions
+
+        Returns:
+            StepResult containing the built CriteriaTree
+        """
+        try:
+            # Validate criteria configuration
+            criteria_config = CriteriaConfig.from_dict(self._criteria_json)
+
+            # Build the criteria tree with embedded test functions
+            criteria_tree = self._criteria_tree_service.build_tree(
+                criteria_config,
+                input
+            )
+
+            return StepResult(
+                data=criteria_tree,
+                status=StepStatus.SUCCESS,
+                original_input=input
+            )
 
-    def execute(self, input: Template) -> CriteriaTree:
-        pass
\ No newline at end of file
+        except Exception as e:
+            return StepResult(
+                data=None,
+                status=StepStatus.FAIL,
+                error=f"Failed to build criteria tree: {str(e)}",
+                failed_at_step=self.__class__.__name__,
+                original_input=input
+            )
diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
index b98d76e..396c68b 100644
--- a/autograder/steps/grade_step.py
+++ b/autograder/steps/grade_step.py
@@ -1,16 +1,107 @@
+from typing import Dict, Any, Union
 from autograder.models.criteria_tree import CriteriaTree
 from autograder.models.dataclass.grading_result import GradingResult
+from autograder.models.dataclass.step_result import StepResult, StepStatus
 from autograder.models.abstract.step import Step
+from autograder.models.abstract.template import Template
+from autograder.models.dataclass.criteria_config import CriteriaConfig
 from autograder.services.grader_service import GraderService
 
 
 class GradeStep(Step):
+    """
+    Step that grades a submission using either a CriteriaTree or raw criteria configuration.
+
+    This step intelligently determines which grading method to use:
+    - If input is CriteriaTree: Use grade_from_tree (for multiple submissions)
+    - If input is Template: Use grade_from_config (for single submission)
+    """
+
+    def __init__(self, criteria_json: dict = None, submission_files: Dict[str, Any] = None, submission_id: str = None):
+        """
+        Initialize the grade step.
+
+        Args:
+            criteria_json: Raw criteria configuration (only needed for single submission mode)
+            submission_files: Student submission files
+            submission_id: Optional identifier for the submission
+        """
+        self._criteria_json = criteria_json
+        self._submission_files = submission_files
+        self._submission_id = submission_id
+        self._grader_service = GraderService()
+
+    def execute(self, input: Union[CriteriaTree, Template]) -> StepResult[GradingResult]:
+        """
+        Grade a submission based on the input type.
+
+        Args:
+            input: Either a CriteriaTree (multi-submission mode) or Template (single submission mode)
+
+        Returns:
+            StepResult containing GradingResult with scores and result tree
+        """
+        try:
+            # Determine which grading method to use based on input type
+            if isinstance(input, CriteriaTree):
+                # Multi-submission mode: grade from pre-built tree
+                result_tree = self._grader_service.grade_from_tree(
+                    criteria_tree=input,
+                    submission_files=self._submission_files,
+                    submission_id=self._submission_id
+                )
+            elif isinstance(input, Template):
+                # Single submission mode: grade directly from config
+                if not self._criteria_json:
+                    raise ValueError("criteria_json is required when grading from template")
+
+                # Validate criteria configuration
+                criteria_config = CriteriaConfig.from_dict(self._criteria_json)
+
+                # Grade directly from config (one-pass)
+                result_tree = self._grader_service.grade_from_config(
+                    criteria_config=criteria_config,
+                    template=input,
+                    submission_files=self._submission_files,
+                    submission_id=self._submission_id
+                )
+            else:
+                raise ValueError(
+                    f"Invalid input type for GradeStep: {type(input).__name__}. "
+                    f"Expected CriteriaTree or Template"
+                )
+
+            # Create grading result
+            final_score = result_tree.calculate_final_score()
+
+            grading_result = GradingResult(
+                final_score=final_score,
+                status="success",
+                result_tree=result_tree
+            )
+
+            return StepResult(
+                data=grading_result,
+                status=StepStatus.SUCCESS,
+                original_input=input
+            )
+
+        except Exception as e:
+            # Return error result
+            grading_result = GradingResult(
+                final_score=0.0,
+                status="error",
+                error=f"Grading failed: {str(e)}",
+                failed_at_step=self.__class__.__name__
+            )
+
+            return StepResult(
+                data=grading_result,
+                status=StepStatus.FAIL,
+                error=str(e),
+                failed_at_step=self.__class__.__name__,
+                original_input=input
+            )
 
-    def __init__(self):
-        self.submission_files = None # Injected at runtime
-        self._grader_service = GraderService() # GraderService here
 
-    def execute(self, input: CriteriaTree) -> GradingResult: # StepResult<GradingResult>
-        """Generate a grading result based on the criteria tree execution over a submission"""
-        pass
 
diff --git a/tests/data/custom_template/custom_template.py b/tests/data/custom_template/custom_template.py
index 56e0f7c..23ce232 100644
--- a/tests/data/custom_template/custom_template.py
+++ b/tests/data/custom_template/custom_template.py
@@ -29,7 +29,7 @@ def parameter_description(self):
     def execute(self, filename: str) -> TestResult:
         request = request_context.get_request()
         submission_files = request.submission_files
-        
+
         if filename in submission_files:
             return TestResult(
                 self.name,
diff --git a/tests/test_pipeline_modes.py b/tests/test_pipeline_modes.py
new file mode 100644
index 0000000..df88317
--- /dev/null
+++ b/tests/test_pipeline_modes.py
@@ -0,0 +1,252 @@
+"""
+Test the pipeline's ability to handle single vs multi-submission modes.
+
+This test verifies:
+1. Single submission mode: Grades directly from config (one-pass)
+2. Multi-submission mode: Builds tree once, grades multiple times
+"""
+
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from autograder.autograder import build_pipeline
+from autograder.models.dataclass.criteria_config import CriteriaConfig
+
+
+def create_simple_criteria():
+    """Create simple test criteria."""
+    return {
+        "base": {
+            "weight": 90,
+            "subjects": [
+                {
+                    "subject_name": "Basic Tests",
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "name": "always_pass",
+                            "parameters": {}
+                        },
+                        {
+                            "name": "check_value",
+                            "parameters": {
+                                "expected": 42
+                            }
+                        }
+                    ]
+                }
+            ]
+        },
+        "bonus": {
+            "weight": 10,
+            "tests": [
+                {
+                    "name": "always_pass",
+                    "parameters": {}
+                }
+            ]
+        }
+    }
+
+
+def create_mock_submission():
+    """Create mock submission files."""
+    return {
+        "main.py": "value = 42\n"
+    }
+
+
+def test_single_submission_mode():
+    """Test single submission mode (grade directly from config)."""
+    print("\n" + "="*80)
+    print("TEST: Single Submission Mode (Direct from Config)")
+    print("="*80)
+
+    criteria = create_simple_criteria()
+    submission = create_mock_submission()
+
+    # Build pipeline for single submission
+    pipeline = build_pipeline(
+        template_name="input_output",
+        include_feedback=False,
+        grading_criteria=criteria,
+        feedback_config=None,
+        setup_config=None,
+        custom_template=None,
+        feedback_mode=None,
+        submission_files=submission,
+        submission_id="test_001",
+        is_multi_submission=False  # Single submission mode
+    )
+
+    # Verify pipeline steps
+    print("\nPipeline Steps:")
+    for i, step in enumerate(pipeline._steps):
+        print(f"  {i+1}. {step.__class__.__name__}")
+
+    print("\nExpected flow:")
+    print("  - TemplateLoaderStep loads the template")
+    print("  - GradeStep grades directly from config (one-pass)")
+    print("  - ExporterStep exports results")
+
+    # Verify GradeStep has criteria_json for single submission mode
+    grade_step = None
+    for step in pipeline._steps:
+        if step.__class__.__name__ == "GradeStep":
+            grade_step = step
+            break
+
+    assert grade_step is not None, "GradeStep not found in pipeline"
+    assert grade_step._criteria_json is not None, "GradeStep should have criteria_json in single mode"
+    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
+
+    print("\n✓ Single submission mode configured correctly")
+    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
+    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
+
+
+def test_multi_submission_mode():
+    """Test multi-submission mode (build tree, then grade)."""
+    print("\n" + "="*80)
+    print("TEST: Multi-Submission Mode (Tree Building)")
+    print("="*80)
+
+    criteria = create_simple_criteria()
+    submission = create_mock_submission()
+
+    # Build pipeline for multiple submissions
+    pipeline = build_pipeline(
+        template_name="input_output",
+        include_feedback=False,
+        grading_criteria=criteria,
+        feedback_config=None,
+        setup_config=None,
+        custom_template=None,
+        feedback_mode=None,
+        submission_files=submission,
+        submission_id="test_002",
+        is_multi_submission=True  # Multi-submission mode
+    )
+
+    # Verify pipeline steps
+    print("\nPipeline Steps:")
+    for i, step in enumerate(pipeline._steps):
+        print(f"  {i+1}. {step.__class__.__name__}")
+
+    print("\nExpected flow:")
+    print("  - TemplateLoaderStep loads the template")
+    print("  - BuildTreeStep builds criteria tree (reusable)")
+    print("  - GradeStep grades from tree")
+    print("  - ExporterStep exports results")
+
+    # Verify BuildTreeStep and GradeStep are present
+    has_build_tree = False
+    grade_step = None
+
+    for step in pipeline._steps:
+        if step.__class__.__name__ == "BuildTreeStep":
+            has_build_tree = True
+        elif step.__class__.__name__ == "GradeStep":
+            grade_step = step
+
+    assert has_build_tree, "BuildTreeStep not found in pipeline for multi-submission mode"
+    assert grade_step is not None, "GradeStep not found in pipeline"
+    assert grade_step._criteria_json is None, "GradeStep should NOT have criteria_json in multi mode"
+    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
+
+    print("\n✓ Multi-submission mode configured correctly")
+    print(f"  - BuildTreeStep present: {has_build_tree}")
+    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
+    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
+
+
+def test_grade_step_input_detection():
+    """Test that GradeStep correctly detects input type."""
+    print("\n" + "="*80)
+    print("TEST: GradeStep Input Type Detection")
+    print("="*80)
+
+    from autograder.steps.grade_step import GradeStep
+    from autograder.models.abstract.template import Template
+    from autograder.models.criteria_tree import CriteriaTree, CategoryNode
+
+    criteria = create_simple_criteria()
+    submission = create_mock_submission()
+
+    # Test 1: GradeStep with Template input (single mode)
+    print("\n1. Testing with Template input (single submission mode):")
+    grade_step_single = GradeStep(
+        criteria_json=criteria,
+        submission_files=submission,
+        submission_id="test_single"
+    )
+
+    # Create a mock template
+    class MockTemplate(Template):
+        def __init__(self):
+            self.name = "mock_template"
+            self.tests = {}
+
+        def get_test(self, test_name):
+            # Return a mock test function
+            def mock_test(*args, **kwargs):
+                return {"passed": True, "score": 100}
+            return mock_test
+
+    mock_template = MockTemplate()
+
+    print("  - Input type: Template")
+    print("  - Expected behavior: Grade from config (one-pass)")
+    print("  ✓ GradeStep will use grade_from_config method")
+
+    # Test 2: GradeStep with CriteriaTree input (multi mode)
+    print("\n2. Testing with CriteriaTree input (multi-submission mode):")
+    grade_step_multi = GradeStep(
+        submission_files=submission,
+        submission_id="test_multi"
+    )
+
+    # Create a mock criteria tree
+    mock_tree = CriteriaTree(
+        base=CategoryNode(name="base", weight=100),
+        bonus=None,
+        penalty=None
+    )
+
+    print("  - Input type: CriteriaTree")
+    print("  - Expected behavior: Grade from tree (reusable)")
+    print("  ✓ GradeStep will use grade_from_tree method")
+
+
+if __name__ == "__main__":
+    print("\n" + "="*80)
+    print("PIPELINE MODE TESTS")
+    print("="*80)
+
+    try:
+        test_single_submission_mode()
+        test_multi_submission_mode()
+        test_grade_step_input_detection()
+
+        print("\n" + "="*80)
+        print("ALL TESTS PASSED ✓")
+        print("="*80)
+        print("\nSummary:")
+        print("  ✓ Single submission mode: Grades directly from config")
+        print("  ✓ Multi-submission mode: Builds tree once, grades multiple times")
+        print("  ✓ GradeStep correctly detects input type (Template vs CriteriaTree)")
+        print("  ✓ Pipeline configuration is flexible and optimized")
+
+    except AssertionError as e:
+        print(f"\n❌ TEST FAILED: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ ERROR: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
diff --git a/tests/unit/test_pipeline_steps.py b/tests/unit/test_pipeline_steps.py
new file mode 100644
index 0000000..638c31d
--- /dev/null
+++ b/tests/unit/test_pipeline_steps.py
@@ -0,0 +1,331 @@
+"""
+Unit tests for BuildTreeStep and GradeStep.
+
+These tests verify:
+1. BuildTreeStep correctly builds a CriteriaTree from config
+2. GradeStep intelligently handles both CriteriaTree and Template inputs
+3. Single vs multi-submission pipeline modes work correctly
+"""
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from autograder.steps.build_tree_step import BuildTreeStep
+from autograder.steps.grade_step import GradeStep
+from autograder.models.dataclass.criteria_config import CriteriaConfig
+from autograder.models.dataclass.step_result import StepStatus
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.test_result import TestResult
+
+
+# Mock Template and TestFunction for testing
+class MockTestFunction(TestFunction):
+    """Mock test function that always passes."""
+
+    def __init__(self, test_name):
+        self._test_name = test_name
+
+    @property
+    def name(self):
+        return self._test_name
+
+    @property
+    def description(self):
+        return f"Mock test function: {self._test_name}"
+
+    def execute(self, *args, **kwargs):
+        """Always return a passing result."""
+        return TestResult(
+            test_name=self._test_name,
+            passed=True,
+            score=100.0,
+            max_score=100.0,
+            message="Test passed (mock)"
+        )
+
+
+class MockTemplate(Template):
+    """Mock template with pre-defined test functions."""
+
+    def __init__(self):
+        self.name = "mock_template"
+        self._tests = {
+            "expect_output": MockTestFunction("expect_output"),
+            "check_file": MockTestFunction("check_file"),
+            "validate_input": MockTestFunction("validate_input")
+        }
+
+    @property
+    def template_name(self):
+        """Get template name."""
+        return "mock_template"
+
+    @property
+    def template_description(self):
+        """Get template description."""
+        return "Mock template for testing purposes"
+
+    @property
+    def requires_pre_executed_tree(self) -> bool:
+        """Mock templates don't require pre-executed trees."""
+        return False
+
+    @property
+    def requires_execution_helper(self) -> bool:
+        """Mock templates don't require execution helpers."""
+        return False
+
+    @property
+    def execution_helper(self):
+        """No execution helper needed for mocks."""
+        return None
+
+    def stop(self):
+        """No cleanup needed for mock templates."""
+        pass
+
+    def get_test(self, test_name: str):
+        """Get a test function by name."""
+        return self._tests.get(test_name)
+
+    def get_available_tests(self):
+        """Get list of available test names."""
+        return list(self._tests.keys())
+
+
+def create_simple_criteria():
+    """Create a simple criteria configuration for testing."""
+    return {
+        "test_library": "input_output",
+        "base": {
+            "weight": 100,
+            "subjects": [
+                {
+                    "subject_name": "Basic Tests",
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "name": "expect_output",
+                            "file": "main.py",
+                            "parameters": [
+                                {"name": "stdin_input", "value": ["hello"]},
+                                {"name": "expected_output", "value": "hello"}
+                            ]
+                        },
+                        {
+                            "name": "expect_output",
+                            "file": "main.py",
+                            "parameters": [
+                                {"name": "stdin_input", "value": ["world"]},
+                                {"name": "expected_output", "value": "world"}
+                            ]
+                        }
+                    ]
+                }
+            ]
+        },
+        "bonus": {
+            "weight": 10,
+            "tests": [
+                {
+                    "name": "expect_output",
+                    "file": "main.py",
+                    "parameters": [
+                        {"name": "stdin_input", "value": ["bonus"]},
+                        {"name": "expected_output", "value": "bonus"}
+                    ]
+                }
+            ]
+        }
+    }
+
+
+def create_mock_submission():
+    """Create mock submission files."""
+    return {
+        "main.py": "# Simple echo program\nprint(input())"
+    }
+
+
+def test_build_tree_step():
+    """Test that BuildTreeStep correctly builds a CriteriaTree."""
+    print("\n" + "="*80)
+    print("TEST: BuildTreeStep")
+    print("="*80)
+
+    # Create criteria and template
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+
+    # Create and execute step
+    build_step = BuildTreeStep(criteria)
+    result = build_step.execute(template)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Build step failed: {result.error}"
+    assert result.data is not None, "CriteriaTree is None"
+
+    criteria_tree = result.data
+
+    # Verify tree structure
+    assert criteria_tree.base is not None, "Base category missing"
+    assert criteria_tree.bonus is not None, "Bonus category missing"
+
+    print("✓ BuildTreeStep successfully built CriteriaTree")
+    print(f"  - Base category: {criteria_tree.base.name}")
+    print(f"  - Bonus category: {criteria_tree.bonus.name}")
+
+    # Print tree structure
+    print("\nCriteria Tree Structure:")
+    criteria_tree.print_tree()
+
+    return criteria_tree
+
+
+def test_grade_from_tree():
+    """Test that GradeStep can grade from a CriteriaTree."""
+    print("\n" + "="*80)
+    print("TEST: GradeStep with CriteriaTree (Multi-Submission Mode)")
+    print("="*80)
+
+    # Build criteria tree first
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+    build_step = BuildTreeStep(criteria)
+    build_result = build_step.execute(template)
+
+    criteria_tree = build_result.data
+    submission_files = create_mock_submission()
+
+    # Create and execute grade step
+    grade_step = GradeStep(
+        submission_files=submission_files,
+        submission_id="test_submission_1"
+    )
+
+    result = grade_step.execute(criteria_tree)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
+    assert result.data is not None, "GradingResult is None"
+
+    grading_result = result.data
+
+    print("✓ GradeStep successfully graded from CriteriaTree")
+    print(f"  - Final Score: {grading_result.final_score}")
+    print(f"  - Status: {grading_result.status}")
+
+    # Print result tree
+    if grading_result.result_tree:
+        print("\nResult Tree:")
+        grading_result.result_tree.print_tree()
+
+    return grading_result
+
+
+def test_grade_from_config():
+    """Test that GradeStep can grade directly from config (single submission mode)."""
+    print("\n" + "="*80)
+    print("TEST: GradeStep with Template (Single Submission Mode)")
+    print("="*80)
+
+    # Create criteria and template
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+    submission_files = create_mock_submission()
+
+    # Create and execute grade step (without building tree first)
+    grade_step = GradeStep(
+        criteria_json=criteria,
+        submission_files=submission_files,
+        submission_id="test_submission_2"
+    )
+
+    result = grade_step.execute(template)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
+    assert result.data is not None, "GradingResult is None"
+
+    grading_result = result.data
+
+    print("✓ GradeStep successfully graded from config")
+    print(f"  - Final Score: {grading_result.final_score}")
+    print(f"  - Status: {grading_result.status}")
+
+    # Print result tree
+    if grading_result.result_tree:
+        print("\nResult Tree:")
+        grading_result.result_tree.print_tree()
+
+    return grading_result
+
+
+def test_invalid_input_type():
+    """Test that GradeStep rejects invalid input types."""
+    print("\n" + "="*80)
+    print("TEST: GradeStep with Invalid Input Type")
+    print("="*80)
+
+    submission_files = create_mock_submission()
+
+    grade_step = GradeStep(
+        submission_files=submission_files,
+        submission_id="test_submission_3"
+    )
+
+    # Try to execute with invalid input (string)
+    result = grade_step.execute("invalid input")
+
+    # Verify it fails gracefully
+    assert result.status == StepStatus.FAIL, "Should fail with invalid input"
+    assert result.error is not None, "Should have error message"
+
+    print("✓ GradeStep correctly rejected invalid input")
+    print(f"  - Error: {result.error}")
+
+
+def run_all_tests():
+    """Run all unit tests."""
+    print("\n" + "#"*80)
+    print("# RUNNING PIPELINE STEPS UNIT TESTS")
+    print("#"*80)
+
+    try:
+        # Test 1: Build tree
+        criteria_tree = test_build_tree_step()
+
+        # Test 2: Grade from tree (multi-submission mode)
+        grading_result_tree = test_grade_from_tree()
+
+        # Test 3: Grade from config (single submission mode)
+        grading_result_config = test_grade_from_config()
+
+        # Test 4: Invalid input handling
+        test_invalid_input_type()
+
+        print("\n" + "#"*80)
+        print("# ALL TESTS PASSED! ✓")
+        print("#"*80)
+
+    except AssertionError as e:
+        print("\n" + "#"*80)
+        print(f"# TEST FAILED: {e}")
+        print("#"*80)
+        raise
+    except Exception as e:
+        print("\n" + "#"*80)
+        print(f"# UNEXPECTED ERROR: {e}")
+        print("#"*80)
+        import traceback
+        traceback.print_exc()
+        raise
+
+
+if __name__ == "__main__":
+    run_all_tests()
+

From fc7f6af31e94094e96cb574bb40fe4d4a39c52c5 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Sat, 3 Jan 2026 12:10:48 -0300
Subject: [PATCH 11/49] feat: tree grading

config grading WIP
---
 autograder/graders/__init__.py               |   0
 autograder/graders/criteria_tree.py          | 131 ++++++
 autograder/models/abstract/template.py       |  16 +-
 autograder/models/criteria_tree.py           |   7 +-
 autograder/parsers/criteria_tree.py          | 124 +++---
 autograder/services/criteria_tree_service.py |  53 +--
 autograder/services/grader_service.py        | 406 +------------------
 autograder/steps/grade_step.py               |  41 +-
 8 files changed, 222 insertions(+), 556 deletions(-)
 create mode 100644 autograder/graders/__init__.py
 create mode 100644 autograder/graders/criteria_tree.py

diff --git a/autograder/graders/__init__.py b/autograder/graders/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/autograder/graders/criteria_tree.py b/autograder/graders/criteria_tree.py
new file mode 100644
index 0000000..b7fed07
--- /dev/null
+++ b/autograder/graders/criteria_tree.py
@@ -0,0 +1,131 @@
+import logging
+from typing import Dict, Optional, Sequence, override
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.criteria_tree import (
+    CategoryNode,
+    CriteriaTree,
+    SubjectNode,
+    TestNode,
+)
+from autograder.models.result_tree import (
+    NodeType,
+    ResultNode,
+    ResultTree,
+    TestResultNode,
+)
+from autograder.utils.processers.criteria_tree import CriteriaTreeProcesser
+
+
+class CriteriaTreeGrader(CriteriaTreeProcesser):
+    def __init__(self, submission_files: Dict) -> None:
+        self.logger = logging.getLogger("GraderService")
+        self.__submission_files = submission_files
+
+    def __balance_nodes(self, nodes: Sequence[ResultNode], factor: float) -> None:
+        if len(nodes) == 0:
+            return
+
+        total_weight = sum(node.weight for node in nodes) * factor
+
+        if total_weight == 0:
+            equal_weight = 100.0 / len(nodes)
+            for node in nodes:
+                node.weight = equal_weight
+        elif total_weight != 100:
+            scale_factor = 100.0 / total_weight
+            for node in nodes:
+                node.weight *= scale_factor
+
+    def __process_holder(self, holder: CategoryNode | SubjectNode) -> ResultNode:
+        result = ResultNode(
+            name=holder.name,
+            node_type=NodeType.CATEGORY
+            if isinstance(holder, CategoryNode)
+            else NodeType.SUBJECT,
+            weight=holder.weight,
+        )
+        if holder.subjects and holder.tests:
+            if not holder.subjects_weight:
+                raise ValueError(f"missing 'subjects_weight' for {holder.name}")
+            factor = holder.subjects_weight / 100.0
+        else:
+            factor = 1.0
+
+        if holder.subjects:
+            subject_results = [
+                self.process_subject(inner_subject) for inner_subject in holder.subjects
+            ]
+            self.__balance_nodes(subject_results, factor)
+
+        if holder.tests:
+            test_results = [self.process_test(test) for test in holder.tests]
+            self.__balance_nodes(test_results, factor)
+
+        return result
+
+    @override
+    def process_subject(self, subject: SubjectNode) -> ResultNode:
+        return self.__process_holder(subject)
+
+    @override
+    def process_test(self, test: TestNode) -> TestResultNode:
+        test_result = TestResultNode(
+            name=test.name,
+            node_type=NodeType.TEST,
+            weight=100.0,
+            test_name=test.name,
+            test_function=test.test_function,
+            test_params=test.parameters,
+            file_target=test.file_target,
+        )
+        test_result.execute(self.__submission_files)
+        return test_result
+
+    @override
+    def process_category(self, category: CategoryNode) -> ResultNode:
+        return self.__process_holder(category)
+
+    def __find_first_test(self, node: CategoryNode | SubjectNode) -> Optional[TestNode]:
+        """Find the first test node in the tree."""
+        if isinstance(node, TestNode):
+            return node
+
+        if hasattr(node, "tests") and node.tests:
+            return node.tests[0]
+
+        if hasattr(node, "subjects") and node.subjects:
+            for subject in node.subjects:
+                result = self.__find_first_test(subject)
+                if result:
+                    return result
+
+        return None
+
+    def grade(self, tree: CriteriaTree, submission_id: Optional[str]) -> ResultTree:
+        self.logger.info(f"Grading from tree for submission: {submission_id}")
+
+        root = ResultNode(name="root", node_type=NodeType.CATEGORY, weight=100.0)
+
+        base_result = self.process_category(tree.base)
+        root.children.append(base_result)
+
+        if tree.bonus:
+            bonus_result = self.process_category(tree.bonus)
+            root.children.append(bonus_result)
+
+        if tree.penalty:
+            penalty_result = self.process_category(tree.penalty)
+            root.children.append(penalty_result)
+
+        result_tree = ResultTree(root, submission_id)
+
+        # Handle AI executor batch if needed
+        # Note: For tree-based grading, the template is embedded in test nodes
+        first_test = self.__find_first_test(tree.base)
+        if first_test and hasattr(first_test, "test_function"):
+            test_func = first_test.test_function
+            if hasattr(test_func, "executor") and test_func.executor:
+                self.logger.info("Executing AI batch requests")
+                test_func.executor.stop()
+
+        return result_tree
diff --git a/autograder/models/abstract/template.py b/autograder/models/abstract/template.py
index 1ac30b1..a0962ed 100644
--- a/autograder/models/abstract/template.py
+++ b/autograder/models/abstract/template.py
@@ -1,13 +1,17 @@
 from abc import ABC, abstractmethod
 
-class Template(ABC):
+from autograder.models.abstract.test_function import TestFunction
+
 
+class Template(ABC):
     def __init__(self):
         self.tests = None
+
     @property
     @abstractmethod
     def template_name(self) -> str:
         pass
+
     @property
     @abstractmethod
     def template_description(self) -> str:
@@ -28,14 +32,14 @@ def requires_execution_helper(self) -> bool:
     def execution_helper(self):
         pass
 
+    @abstractmethod
+    def get_test(self, name: str) -> TestFunction:
+        pass
+
     @abstractmethod
     def stop(self):
         pass
+
     def get_tests(self):
         return self.tests
 
-
-
-
-
-    
\ No newline at end of file
diff --git a/autograder/models/criteria_tree.py b/autograder/models/criteria_tree.py
index 94370a4..dcf2b5f 100644
--- a/autograder/models/criteria_tree.py
+++ b/autograder/models/criteria_tree.py
@@ -29,8 +29,6 @@ class TestNode:
     test_function: Any  # TestFunction instance from template
     parameters: List[Any] = field(default_factory=list)
     file_target: Optional[str] = None
-    category_name: str = ""
-    subject_name: str = ""
     weight: float = 100.0
 
     def __repr__(self):
@@ -53,6 +51,7 @@ class SubjectNode:
     weight: float
     subjects: List["SubjectNode"] = field(default_factory=list)
     tests: List[TestNode] = field(default_factory=list)
+    subjects_weight: Optional[float] = None
 
     def __repr__(self):
         if self.subjects:
@@ -82,6 +81,7 @@ class CategoryNode:
     weight: float
     subjects: List[SubjectNode] = field(default_factory=list)
     tests: List[TestNode] = field(default_factory=list)
+    subjects_weight: Optional[float] = None
 
     def __repr__(self):
         if self.subjects:
@@ -90,6 +90,9 @@ def __repr__(self):
             f"CategoryNode({self.name}, weight={self.weight}, tests={len(self.tests)})"
         )
 
+    def add_subjects(self, subjects: List[SubjectNode]) -> None:
+        self.subjects.extend(subjects)
+
     def get_all_tests(self) -> List[TestNode]:
         """Recursively collect all test nodes under this category."""
         tests = []
diff --git a/autograder/parsers/criteria_tree.py b/autograder/parsers/criteria_tree.py
index 248da00..da79b2e 100644
--- a/autograder/parsers/criteria_tree.py
+++ b/autograder/parsers/criteria_tree.py
@@ -1,31 +1,31 @@
-from typing import Any, Dict, List, Optional, override
+from typing import Dict, List, Optional, override
 
+from autograder.models.abstract import test_function
 from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.config.category import CategoryConfig
 from autograder.models.config.criteria import CriteriaConfig
 from autograder.models.config.subject import SubjectConfig
 from autograder.models.config.test import TestConfig
 from autograder.models.criteria_tree import (
     CriteriaTree,
-    Subject,
-    Test,
-    TestCall,
-    TestCategory,
+    SubjectNode,
+    TestNode,
+    CategoryNode,
 )
-from autograder.models.dataclass.test_result import TestResult
 
 
 class CriteriaTreeParser:
-    def __parse_subjects(self, configs: Dict[str, SubjectConfig]) -> List[Subject]:
-        subjects = [
-            self.__parse_subject(s_name, s_data) for s_name, s_data in configs.items()
-        ]
+    def __init__(self, template: Template) -> None:
+        self.__template: Template = template
+
+    def __parse_subjects(self, configs: List[SubjectConfig]) -> List[SubjectNode]:
+        subjects = [self.__parse_subject(config) for config in configs]
         self.__balance_subject_weights(subjects)
         return subjects
 
-    def __parse_subject(self, name: str, config: SubjectConfig) -> Subject:
-        subject = Subject(name)
-        if config.weight:
-            subject.weight = config.weight
+    def __parse_subject(self, config: SubjectConfig) -> SubjectNode:
+        subject = SubjectNode(config.subject_name, config.weight)
 
         if config.subjects_weight:
             subject.subjects_weight = config.subjects_weight
@@ -38,40 +38,39 @@ def __parse_subject(self, name: str, config: SubjectConfig) -> Subject:
 
         return subject
 
-    def __balance_subject_weights(self, subjects: List[Subject]) -> None:
+    def __balance_subject_weights(self, subjects: List[SubjectNode]) -> None:
         total_weight = sum(s.weight for s in subjects)
         if total_weight > 0 and total_weight != 100:
             scaling_factor = 100 / total_weight
             for subject in subjects:
                 subject.weight = round(subject.weight * scaling_factor)
 
-    def __parse_tests(self, tests_data: List[TestConfig | str]) -> List[Test]:
-        return [self.__parse_test(test_item) for test_item in tests_data]
-
-    def __parse_test(self, test_item: TestConfig | str) -> Test:
-        if isinstance(test_item, str):
-            test_name = test_item
-            test_file = None
-            calls = None
-        elif isinstance(test_item, TestConfig):
-            test_name = test_item.name
-            test_file = test_item.file
-            calls = test_item.calls
-
-        test = Test(test_name, test_file)
-        if calls is not None:
-            for call_args in calls:
-                test.add_call(TestCall(call_args))
-        else:
-            test.add_call(TestCall([]))
+    def __parse_tests(self, test_configs: List[TestConfig]) -> List[TestNode]:
+        return [self.__parse_test(test_item) for test_item in test_configs]
 
-        return test
+    def __find_test_function(self, name: str) -> Optional[TestFunction]:
+        try:
+            return self.__template.get_test(name)
+        except (AttributeError, KeyError):
+            return None
 
-    def __parse_category(self, category_name, config: SubjectConfig) -> TestCategory:
-        category = TestCategory(category_name)
+    def __parse_test(self, config: TestConfig) -> TestNode:
+        test_function = self.__find_test_function(config.name)
+        if not test_function:
+            raise ValueError(f"Couldn't find test {config.name}")
+
+        test = TestNode(
+            config.name,
+            config.name,
+            test_function,
+            config.get_args_list() or list(),
+            config.file,
+        )
+
+        return test
 
-        if config.weight:
-            category.max_score = config.weight
+    def __parse_category(self, category_name, config: CategoryConfig) -> CategoryNode:
+        category = CategoryNode(category_name, config.weight)
 
         if config.subjects:
             category.add_subjects(self.__parse_subjects(config.subjects))
@@ -81,45 +80,14 @@ def __parse_category(self, category_name, config: SubjectConfig) -> TestCategory
 
         return category
 
-    def parse_tree(self, tree_data: Dict[str, Any]) -> CriteriaTree:
-        tree = CriteriaTree()
-        config = CriteriaConfig(**tree_data)
+    def parse_tree(self, config: CriteriaConfig) -> CriteriaTree:
+        base_category = self.__parse_category("base", config.base)
+        tree = CriteriaTree(base_category)
 
-        for category_name in ["base", "bonus", "penalty"]:
-            category_data = getattr(config, category_name)
-            if category_data is None:
-                continue
-            parsed_category = self.__parse_category(category_name, category_data)
-            setattr(tree, category_name, parsed_category)
+        if config.bonus:
+            tree.bonus = self.__parse_category("bonus", config.bonus)
 
-        return tree
-
-
-class PreExecutedTreeParser(CriteriaTreeParser):
-    def __init__(self, template: Template, submission_files: Dict[str, str]) -> None:
-        self.__template: Template = template
-        self.__submission_files = submission_files
-        self.__current_subject_name: Optional[str] = None
-
-    @override
-    def __parse_subject(self, name: str, config: SubjectConfig) -> Subject:
-        self.__current_subject_name = name
-        subject = super().__parse_subject(name, config)
-        self.__current_subject_name = None
-        return subject
+        if config.penalty:
+            tree.penalty = self.__parse_category("penalty", config.penalty)
 
-    @override
-    def __parse_tests(self, tests_data: List[TestConfig | str]) -> List[TestResult]:
-        subject_name = self.__current_subject_name
-        if subject_name is None:
-            raise ValueError(
-                "Failed to get subject_name during pre executed tree parsing"
-            )
-        tests = super().__parse_tests(tests_data)
-        result = []
-        for test in tests:
-            executed_tests = test.get_result(
-                self.__template, self.__submission_files, subject_name
-            )
-            result.extend(executed_tests)
-        return result
+        return tree
diff --git a/autograder/services/criteria_tree_service.py b/autograder/services/criteria_tree_service.py
index fff10fb..4fa87a6 100644
--- a/autograder/services/criteria_tree_service.py
+++ b/autograder/services/criteria_tree_service.py
@@ -4,6 +4,7 @@
 from autograder.models.abstract.template import Template
 from autograder.models.config.criteria import CriteriaConfig
 from autograder.models.criteria_tree import CriteriaTree
+from autograder.parsers.criteria_tree import CriteriaTreeParser
 
 
 class CriteriaTreeService:
@@ -40,54 +41,8 @@ def build_tree(
         """
         self.logger.info("Building criteria tree")
 
+        parser = CriteriaTreeParser(template)
+        tree = parser.parse_tree(criteria_config)
+
         self.logger.info("Criteria tree built successfully")
         return tree
-
-
-class CriteriaTreeBuilder:
-    """
-    Convenience builder class for creating criteria trees.
-
-    Usage:
-        builder = CriteriaTreeBuilder()
-        tree = (builder
-            .from_dict(criteria_dict)
-            .with_template(template)
-            .build())
-    """
-
-    def __init__(self):
-        self._config: Optional[CriteriaConfig] = None
-        self._template: Optional[Template] = None
-        self._service = CriteriaTreeService()
-
-    def from_dict(self, criteria_dict: dict) -> "CriteriaTreeBuilder":
-        """Load and validate criteria from dictionary."""
-        self._config = CriteriaConfig.from_dict(criteria_dict)
-        return self
-
-    def from_json(self, criteria_json: str) -> "CriteriaTreeBuilder":
-        """Load and validate criteria from JSON string."""
-        self._config = CriteriaConfig.from_json(criteria_json)
-        return self
-
-    def with_config(self, config: CriteriaConfig) -> "CriteriaTreeBuilder":
-        """Use an already validated CriteriaConfig."""
-        self._config = config
-        return self
-
-    def with_template(self, template: Template) -> "CriteriaTreeBuilder":
-        """Set the template to use."""
-        self._template = template
-        return self
-
-    def build(self) -> CriteriaTree:
-        """Build the criteria tree."""
-        if not self._config:
-            raise ValueError(
-                "Criteria configuration not set. Use from_dict() or from_json()"
-            )
-        if not self._template:
-            raise ValueError("Template not set. Use with_template()")
-
-        return self._service.build_tree(self._config, self._template)
diff --git a/autograder/services/grader_service.py b/autograder/services/grader_service.py
index a2d28ab..a976f55 100644
--- a/autograder/services/grader_service.py
+++ b/autograder/services/grader_service.py
@@ -1,414 +1,22 @@
-"""
-Enhanced GraderService - can build result trees from CriteriaTree or raw config.
-
-This service handles two grading flows:
-1. Single submission: Build result tree directly from criteria config (one-pass)
-2. Multiple submissions: Build result tree from pre-built criteria tree (reusable)
-"""
 import logging
-from typing import Dict, Any, Optional, List
 
-from autograder.models.criteria_tree import CriteriaTree, CategoryNode, SubjectNode, TestNode
-from autograder.models.result_tree import ResultTree, ResultNode, TestResultNode, NodeType
-from autograder.models.abstract.template import Template
-from autograder.models.dataclass.criteria_config import CriteriaConfig
+from typing import Dict, Any, Optional
+from autograder.graders.criteria_tree import CriteriaTreeGrader
+from autograder.models.criteria_tree import CriteriaTree
+from autograder.models.result_tree import ResultTree
 from autograder.services.criteria_tree_service import CriteriaTreeService
 
 
 class GraderService:
-    """
-    Service for executing grading and building result trees.
-
-    Supports two modes:
-    1. Direct grading: Build result tree from criteria config (single submission)
-    2. Tree-based grading: Build result tree from criteria tree (multiple submissions)
-    """
-
     def __init__(self):
         self.logger = logging.getLogger("GraderService")
         self._criteria_service = CriteriaTreeService()
 
-    def grade_from_config(
-        self,
-        criteria_config: CriteriaConfig,
-        template: Template,
-        submission_files: Dict[str, Any],
-        submission_id: Optional[str] = None
-    ) -> ResultTree:
-        """
-        Grade a submission directly from criteria configuration (one-pass).
-
-        This is optimized for single submissions - builds and executes in one traversal.
-
-        Args:
-            criteria_config: Validated criteria configuration
-            template: Template with test functions
-            submission_files: Student submission files
-            submission_id: Optional identifier for the submission
-
-        Returns:
-            Complete ResultTree with all tests executed
-        """
-        self.logger.info(f"Grading from config for submission: {submission_id}")
-
-        # Build root result node
-        root = ResultNode(
-            name="root",
-            node_type=NodeType.CATEGORY,
-            weight=100.0
-        )
-
-        # Build and execute base category (required)
-        base_result = self._build_and_execute_category(
-            "base",
-            criteria_config.base,
-            template,
-            submission_files
-        )
-        root.children.append(base_result)
-
-        # Build and execute bonus category (optional)
-        if criteria_config.bonus:
-            bonus_result = self._build_and_execute_category(
-                "bonus",
-                criteria_config.bonus,
-                template,
-                submission_files
-            )
-            root.children.append(bonus_result)
-
-        # Build and execute penalty category (optional)
-        if criteria_config.penalty:
-            penalty_result = self._build_and_execute_category(
-                "penalty",
-                criteria_config.penalty,
-                template,
-                submission_files
-            )
-            root.children.append(penalty_result)
-
-        # Create result tree and calculate scores
-        result_tree = ResultTree(
-            root=root,
-            submission_id=submission_id,
-            template_name=template.name if hasattr(template, 'name') else None
-        )
-
-        # Handle AI executor batch if needed
-        if hasattr(template, 'execution_helper') and template.execution_helper:
-            self.logger.info("Executing AI batch requests")
-            template.execution_helper.stop()
-
-        # Calculate final scores
-        final_score = result_tree.calculate_final_score()
-        self.logger.info(f"Grading complete. Final score: {final_score}")
-
-        return result_tree
-
     def grade_from_tree(
         self,
         criteria_tree: CriteriaTree,
         submission_files: Dict[str, Any],
-        submission_id: Optional[str] = None
+        submission_id: Optional[str] = None,
     ) -> ResultTree:
-        """
-        Grade a submission using a pre-built criteria tree.
-
-        This is optimized for multiple submissions - reuses the same criteria tree.
-
-        Args:
-            criteria_tree: Pre-built criteria tree with test functions
-            submission_files: Student submission files
-            submission_id: Optional identifier for the submission
-
-        Returns:
-            Complete ResultTree with all tests executed
-        """
-        self.logger.info(f"Grading from tree for submission: {submission_id}")
-
-        # Build root result node
-        root = ResultNode(
-            name="root",
-            node_type=NodeType.CATEGORY,
-            weight=100.0
-        )
-
-        # Execute base category
-        if criteria_tree.base:
-            base_result = self._execute_category(
-                criteria_tree.base,
-                submission_files
-            )
-            root.children.append(base_result)
-
-        # Execute bonus category
-        if criteria_tree.bonus:
-            bonus_result = self._execute_category(
-                criteria_tree.bonus,
-                submission_files
-            )
-            root.children.append(bonus_result)
-
-        # Execute penalty category
-        if criteria_tree.penalty:
-            penalty_result = self._execute_category(
-                criteria_tree.penalty,
-                submission_files
-            )
-            root.children.append(penalty_result)
-
-        # Create result tree
-        result_tree = ResultTree(
-            root=root,
-            submission_id=submission_id
-        )
-
-        # Handle AI executor batch if needed
-        # Note: For tree-based grading, the template is embedded in test nodes
-        first_test = self._find_first_test(criteria_tree.base)
-        if first_test and hasattr(first_test, 'test_function'):
-            test_func = first_test.test_function
-            if hasattr(test_func, 'executor') and test_func.executor:
-                self.logger.info("Executing AI batch requests")
-                test_func.executor.stop()
-
-        # Calculate final scores
-        final_score = result_tree.calculate_final_score()
-        self.logger.info(f"Grading complete. Final score: {final_score}")
-
-        return result_tree
-
-    def _build_and_execute_category(
-        self,
-        category_name: str,
-        category_config,
-        template: Template,
-        submission_files: Dict[str, Any]
-    ) -> ResultNode:
-        """Build and execute a category in one pass."""
-        category_result = ResultNode(
-            name=category_name,
-            node_type=NodeType.CATEGORY,
-            weight=category_config.weight
-        )
-
-        # Category has either subjects or tests
-        if category_config.subjects:
-            # Subjects are now an array with subject_name field
-            for subject_config in category_config.subjects:
-                subject_result = self._build_and_execute_subject(
-                    subject_config.subject_name,
-                    subject_config,
-                    template,
-                    submission_files,
-                    category_name
-                )
-                category_result.children.append(subject_result)
-
-            # Balance weights
-            self._balance_weights(category_result.children)
-
-        elif category_config.tests:
-            test_results = self._build_and_execute_tests(
-                category_config.tests,
-                template,
-                submission_files,
-                category_name,
-                category_name
-            )
-            category_result.children.extend(test_results)
-
-        return category_result
-
-    def _build_and_execute_subject(
-        self,
-        subject_name: str,
-        subject_config,
-        template: Template,
-        submission_files: Dict[str, Any],
-        category_name: str
-    ) -> ResultNode:
-        """Recursively build and execute a subject in one pass."""
-        subject_result = ResultNode(
-            name=subject_name,
-            node_type=NodeType.SUBJECT,
-            weight=subject_config.weight
-        )
-
-        # Subject has either nested subjects or tests
-        if subject_config.subjects:
-            # Subjects are now an array with subject_name field
-            for child_config in subject_config.subjects:
-                child_result = self._build_and_execute_subject(
-                    child_config.subject_name,
-                    child_config,
-                    template,
-                    submission_files,
-                    category_name
-                )
-                subject_result.children.append(child_result)
-
-            # Balance weights
-            self._balance_weights(subject_result.children)
-
-        elif subject_config.tests:
-            test_results = self._build_and_execute_tests(
-                subject_config.tests,
-                template,
-                submission_files,
-                category_name,
-                subject_name
-            )
-            subject_result.children.extend(test_results)
-
-        return subject_result
-
-    def _build_and_execute_tests(
-        self,
-        test_configs: List,
-        template: Template,
-        submission_files: Dict[str, Any],
-        category_name: str,
-        subject_name: str
-    ) -> List[TestResultNode]:
-        """Build and execute test nodes."""
-        test_results = []
-
-        for test_index, test_config in enumerate(test_configs):
-            # Find test function
-            test_function = template.get_test(test_config.name)
-
-            if not test_function:
-                raise ValueError(
-                    f"Test '{test_config.name}' not found in template"
-                )
-
-            # Convert named parameters to args list
-            params = test_config.get_args_list() if test_config.parameters else []
-
-            # Create and execute test node
-            test_node = TestResultNode(
-                name=f"{test_config.name}_{test_index}",
-                node_type=NodeType.TEST,
-                weight=100.0,  # Will be balanced
-                test_name=test_config.name,
-                test_function=test_function,
-                test_params=params,
-                file_target=test_config.file
-            )
-
-            # Execute test
-            test_node.execute(submission_files)
-            test_results.append(test_node)
-
-        # Balance weights
-        if test_results:
-            self._balance_weights(test_results)
-
-        return test_results
-
-    def _execute_category(
-        self,
-        category_node: CategoryNode,
-        submission_files: Dict[str, Any]
-    ) -> ResultNode:
-        """Execute a category from criteria tree."""
-        category_result = ResultNode(
-            name=category_node.name,
-            node_type=NodeType.CATEGORY,
-            weight=category_node.weight
-        )
-
-        # Execute subjects
-        if hasattr(category_node, 'subjects') and category_node.subjects:
-            for subject in category_node.subjects:
-                subject_result = self._execute_subject(subject, submission_files)
-                category_result.children.append(subject_result)
-
-        # Execute tests
-        if hasattr(category_node, 'tests') and category_node.tests:
-            for test in category_node.tests:
-                test_result = self._execute_test(test, submission_files)
-                category_result.children.append(test_result)
-
-        return category_result
-
-    def _execute_subject(
-        self,
-        subject_node: SubjectNode,
-        submission_files: Dict[str, Any]
-    ) -> ResultNode:
-        """Execute a subject from criteria tree."""
-        subject_result = ResultNode(
-            name=subject_node.name,
-            node_type=NodeType.SUBJECT,
-            weight=subject_node.weight
-        )
-
-        # Execute nested subjects
-        if hasattr(subject_node, 'subjects') and subject_node.subjects:
-            for child in subject_node.subjects:
-                child_result = self._execute_subject(child, submission_files)
-                subject_result.children.append(child_result)
-
-        # Execute tests
-        if hasattr(subject_node, 'tests') and subject_node.tests:
-            for test in subject_node.tests:
-                test_result = self._execute_test(test, submission_files)
-                subject_result.children.append(test_result)
-
-        return subject_result
-
-    def _execute_test(
-        self,
-        test_node: TestNode,
-        submission_files: Dict[str, Any]
-    ) -> TestResultNode:
-        """Execute a single test from criteria tree."""
-        result_node = TestResultNode(
-            name=test_node.name,
-            node_type=NodeType.TEST,
-            weight=test_node.weight,
-            test_name=test_node.test_name,
-            test_function=test_node.test_function,
-            test_params=test_node.parameters,
-            file_target=test_node.file_target
-        )
-
-        # Execute the test
-        result_node.execute(submission_files)
-
-        return result_node
-
-    def _balance_weights(self, nodes: List[ResultNode]) -> None:
-        """Balance weights of sibling nodes to sum to 100."""
-        if not nodes:
-            return
-
-        total_weight = sum(node.weight for node in nodes)
-
-        if total_weight == 0:
-            equal_weight = 100.0 / len(nodes)
-            for node in nodes:
-                node.weight = equal_weight
-        elif total_weight != 100:
-            scale_factor = 100.0 / total_weight
-            for node in nodes:
-                node.weight *= scale_factor
-
-    def _find_first_test(self, node) -> Optional[TestNode]:
-        """Find the first test node in the tree."""
-        if isinstance(node, TestNode):
-            return node
-
-        if hasattr(node, 'tests') and node.tests:
-            return node.tests[0]
-
-        if hasattr(node, 'subjects') and node.subjects:
-            for subject in node.subjects:
-                result = self._find_first_test(subject)
-                if result:
-                    return result
-
-        return None
-
+        grader = CriteriaTreeGrader(submission_files)
+        return grader.grade(criteria_tree, submission_id)
diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
index 396c68b..24b41cc 100644
--- a/autograder/steps/grade_step.py
+++ b/autograder/steps/grade_step.py
@@ -1,4 +1,4 @@
-from typing import Dict, Any, Union
+from typing import Dict, Any, Optional, Union
 from autograder.models.criteria_tree import CriteriaTree
 from autograder.models.dataclass.grading_result import GradingResult
 from autograder.models.dataclass.step_result import StepResult, StepStatus
@@ -17,7 +17,12 @@ class GradeStep(Step):
     - If input is Template: Use grade_from_config (for single submission)
     """
 
-    def __init__(self, criteria_json: dict = None, submission_files: Dict[str, Any] = None, submission_id: str = None):
+    def __init__(
+        self,
+        submission_files: Dict[str, Any],
+        submission_id: Optional[str],
+        criteria_json: Optional[Dict] = None,
+    ):
         """
         Initialize the grade step.
 
@@ -31,7 +36,9 @@ def __init__(self, criteria_json: dict = None, submission_files: Dict[str, Any]
         self._submission_id = submission_id
         self._grader_service = GraderService()
 
-    def execute(self, input: Union[CriteriaTree, Template]) -> StepResult[GradingResult]:
+    def execute(
+        self, input: Union[CriteriaTree, Template]
+    ) -> StepResult[GradingResult]:
         """
         Grade a submission based on the input type.
 
@@ -48,12 +55,14 @@ def execute(self, input: Union[CriteriaTree, Template]) -> StepResult[GradingRes
                 result_tree = self._grader_service.grade_from_tree(
                     criteria_tree=input,
                     submission_files=self._submission_files,
-                    submission_id=self._submission_id
+                    submission_id=self._submission_id,
                 )
             elif isinstance(input, Template):
                 # Single submission mode: grade directly from config
                 if not self._criteria_json:
-                    raise ValueError("criteria_json is required when grading from template")
+                    raise ValueError(
+                        "criteria_json is required when grading from template"
+                    )
 
                 # Validate criteria configuration
                 criteria_config = CriteriaConfig.from_dict(self._criteria_json)
@@ -63,27 +72,18 @@ def execute(self, input: Union[CriteriaTree, Template]) -> StepResult[GradingRes
                     criteria_config=criteria_config,
                     template=input,
                     submission_files=self._submission_files,
-                    submission_id=self._submission_id
-                )
-            else:
-                raise ValueError(
-                    f"Invalid input type for GradeStep: {type(input).__name__}. "
-                    f"Expected CriteriaTree or Template"
+                    submission_id=self._submission_id,
                 )
 
             # Create grading result
             final_score = result_tree.calculate_final_score()
 
             grading_result = GradingResult(
-                final_score=final_score,
-                status="success",
-                result_tree=result_tree
+                final_score=final_score, status="success", result_tree=result_tree
             )
 
             return StepResult(
-                data=grading_result,
-                status=StepStatus.SUCCESS,
-                original_input=input
+                data=grading_result, status=StepStatus.SUCCESS, original_input=input
             )
 
         except Exception as e:
@@ -92,7 +92,7 @@ def execute(self, input: Union[CriteriaTree, Template]) -> StepResult[GradingRes
                 final_score=0.0,
                 status="error",
                 error=f"Grading failed: {str(e)}",
-                failed_at_step=self.__class__.__name__
+                failed_at_step=self.__class__.__name__,
             )
 
             return StepResult(
@@ -100,8 +100,5 @@ def execute(self, input: Union[CriteriaTree, Template]) -> StepResult[GradingRes
                 status=StepStatus.FAIL,
                 error=str(e),
                 failed_at_step=self.__class__.__name__,
-                original_input=input
+                original_input=input,
             )
-
-
-

From 0a18e1d93f890d61029f729ee00fad82bdfbc0ba Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 13:02:13 -0300
Subject: [PATCH 12/49] fix: removed grade_from_config

---
 autograder/autograder.py                      | 48 +++++++------------
 autograder/services/criteria_tree_service.py  |  3 +-
 autograder/services/grader_service.py         |  2 +-
 autograder/{ => services}/graders/__init__.py |  0
 .../{ => services}/graders/criteria_tree.py   |  1 -
 autograder/{ => services}/parsers/__init__.py |  0
 .../{ => services}/parsers/criteria_tree.py   |  3 +-
 autograder/steps/grade_step.py                | 40 +++-------------
 8 files changed, 27 insertions(+), 70 deletions(-)
 rename autograder/{ => services}/graders/__init__.py (100%)
 rename autograder/{ => services}/graders/criteria_tree.py (98%)
 rename autograder/{ => services}/parsers/__init__.py (100%)
 rename autograder/{ => services}/parsers/criteria_tree.py (96%)

diff --git a/autograder/autograder.py b/autograder/autograder.py
index e399ef4..2065b71 100644
--- a/autograder/autograder.py
+++ b/autograder/autograder.py
@@ -10,16 +10,16 @@
 
 
 def build_pipeline(
-                 template_name,
-                 include_feedback,
-                 grading_criteria,
-                 feedback_config,
-                 setup_config = None,
-                 custom_template = None,
-                 feedback_mode = None,
-                 submission_files = None,
-                 submission_id = None,
-                 is_multi_submission = False):
+    template_name,
+    include_feedback,
+    grading_criteria,
+    feedback_config,
+    setup_config=None,
+    custom_template=None,
+    feedback_mode=None,
+    submission_files=None,
+    submission_id=None,
+):
     """
     Build an autograder pipeline based on configuration.
 
@@ -47,21 +47,13 @@ def build_pipeline(
     # Load template
     pipeline.add_step(TemplateLoaderStep(template_name, custom_template))
 
-    # Conditional tree building and grading based on submission count
-    if is_multi_submission:
-        # Multi-submission mode: Build tree once, then grade
-        pipeline.add_step(BuildTreeStep(grading_criteria))
-        pipeline.add_step(GradeStep(
-            submission_files=submission_files,
-            submission_id=submission_id
-        ))
-    else:
-        # Single submission mode: Grade directly from config (one-pass)
-        pipeline.add_step(GradeStep(
-            criteria_json=grading_criteria,
-            submission_files=submission_files,
-            submission_id=submission_id
-        ))
+    # Parse the criteria tree
+    pipeline.add_step(BuildTreeStep(grading_criteria))
+
+    # Grade
+    pipeline.add_step(
+        GradeStep(submission_files=submission_files, submission_id=submission_id)
+    )
 
     # Feedback generation (if configured)
     if include_feedback:
@@ -72,9 +64,3 @@ def build_pipeline(
     pipeline.add_step(ExporterStep(UpstashDriver))
 
     return pipeline
-
-
-
-
-
-
diff --git a/autograder/services/criteria_tree_service.py b/autograder/services/criteria_tree_service.py
index 4fa87a6..5374414 100644
--- a/autograder/services/criteria_tree_service.py
+++ b/autograder/services/criteria_tree_service.py
@@ -1,10 +1,9 @@
 import logging
-from typing import Optional
 
 from autograder.models.abstract.template import Template
 from autograder.models.config.criteria import CriteriaConfig
 from autograder.models.criteria_tree import CriteriaTree
-from autograder.parsers.criteria_tree import CriteriaTreeParser
+from autograder.services.parsers.criteria_tree import CriteriaTreeParser
 
 
 class CriteriaTreeService:
diff --git a/autograder/services/grader_service.py b/autograder/services/grader_service.py
index a976f55..96ce68e 100644
--- a/autograder/services/grader_service.py
+++ b/autograder/services/grader_service.py
@@ -1,10 +1,10 @@
 import logging
 
 from typing import Dict, Any, Optional
-from autograder.graders.criteria_tree import CriteriaTreeGrader
 from autograder.models.criteria_tree import CriteriaTree
 from autograder.models.result_tree import ResultTree
 from autograder.services.criteria_tree_service import CriteriaTreeService
+from autograder.services.graders.criteria_tree import CriteriaTreeGrader
 
 
 class GraderService:
diff --git a/autograder/graders/__init__.py b/autograder/services/graders/__init__.py
similarity index 100%
rename from autograder/graders/__init__.py
rename to autograder/services/graders/__init__.py
diff --git a/autograder/graders/criteria_tree.py b/autograder/services/graders/criteria_tree.py
similarity index 98%
rename from autograder/graders/criteria_tree.py
rename to autograder/services/graders/criteria_tree.py
index b7fed07..d1767ca 100644
--- a/autograder/graders/criteria_tree.py
+++ b/autograder/services/graders/criteria_tree.py
@@ -1,6 +1,5 @@
 import logging
 from typing import Dict, Optional, Sequence, override
-from autograder.models.config.criteria import CriteriaConfig
 from autograder.models.criteria_tree import (
     CategoryNode,
     CriteriaTree,
diff --git a/autograder/parsers/__init__.py b/autograder/services/parsers/__init__.py
similarity index 100%
rename from autograder/parsers/__init__.py
rename to autograder/services/parsers/__init__.py
diff --git a/autograder/parsers/criteria_tree.py b/autograder/services/parsers/criteria_tree.py
similarity index 96%
rename from autograder/parsers/criteria_tree.py
rename to autograder/services/parsers/criteria_tree.py
index da79b2e..eb5f874 100644
--- a/autograder/parsers/criteria_tree.py
+++ b/autograder/services/parsers/criteria_tree.py
@@ -1,6 +1,5 @@
-from typing import Dict, List, Optional, override
+from typing import List, Optional
 
-from autograder.models.abstract import test_function
 from autograder.models.abstract.template import Template
 from autograder.models.abstract.test_function import TestFunction
 from autograder.models.config.category import CategoryConfig
diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
index 24b41cc..4719f24 100644
--- a/autograder/steps/grade_step.py
+++ b/autograder/steps/grade_step.py
@@ -1,10 +1,8 @@
-from typing import Dict, Any, Optional, Union
+from typing import Dict, Any, Optional
 from autograder.models.criteria_tree import CriteriaTree
 from autograder.models.dataclass.grading_result import GradingResult
 from autograder.models.dataclass.step_result import StepResult, StepStatus
 from autograder.models.abstract.step import Step
-from autograder.models.abstract.template import Template
-from autograder.models.dataclass.criteria_config import CriteriaConfig
 from autograder.services.grader_service import GraderService
 
 
@@ -21,7 +19,6 @@ def __init__(
         self,
         submission_files: Dict[str, Any],
         submission_id: Optional[str],
-        criteria_json: Optional[Dict] = None,
     ):
         """
         Initialize the grade step.
@@ -31,14 +28,11 @@ def __init__(
             submission_files: Student submission files
             submission_id: Optional identifier for the submission
         """
-        self._criteria_json = criteria_json
         self._submission_files = submission_files
         self._submission_id = submission_id
         self._grader_service = GraderService()
 
-    def execute(
-        self, input: Union[CriteriaTree, Template]
-    ) -> StepResult[GradingResult]:
+    def execute(self, input: CriteriaTree) -> StepResult[GradingResult]:
         """
         Grade a submission based on the input type.
 
@@ -49,31 +43,11 @@ def execute(
             StepResult containing GradingResult with scores and result tree
         """
         try:
-            # Determine which grading method to use based on input type
-            if isinstance(input, CriteriaTree):
-                # Multi-submission mode: grade from pre-built tree
-                result_tree = self._grader_service.grade_from_tree(
-                    criteria_tree=input,
-                    submission_files=self._submission_files,
-                    submission_id=self._submission_id,
-                )
-            elif isinstance(input, Template):
-                # Single submission mode: grade directly from config
-                if not self._criteria_json:
-                    raise ValueError(
-                        "criteria_json is required when grading from template"
-                    )
-
-                # Validate criteria configuration
-                criteria_config = CriteriaConfig.from_dict(self._criteria_json)
-
-                # Grade directly from config (one-pass)
-                result_tree = self._grader_service.grade_from_config(
-                    criteria_config=criteria_config,
-                    template=input,
-                    submission_files=self._submission_files,
-                    submission_id=self._submission_id,
-                )
+            result_tree = self._grader_service.grade_from_tree(
+                criteria_tree=input,
+                submission_files=self._submission_files,
+                submission_id=self._submission_id,
+            )
 
             # Create grading result
             final_score = result_tree.calculate_final_score()

From a901d7eb2d6f0e4144e7066030dba88eb43e5264 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 13:41:59 -0300
Subject: [PATCH 13/49] fix: update imports and typings

---
 autograder/models/criteria_tree.py           |  6 ----
 autograder/utils/formatters/criteria_tree.py | 33 +++++---------------
 autograder/utils/printers/criteria_tree.py   | 14 +++++----
 autograder/utils/processers/criteria_tree.py |  8 ++---
 4 files changed, 19 insertions(+), 42 deletions(-)

diff --git a/autograder/models/criteria_tree.py b/autograder/models/criteria_tree.py
index dcf2b5f..fb6f659 100644
--- a/autograder/models/criteria_tree.py
+++ b/autograder/models/criteria_tree.py
@@ -8,7 +8,6 @@
 from typing import List, Optional, Any
 from dataclasses import dataclass, field
 
-from autograder.utils.formatters.criteria_tree import PreExecutedTreeFormatter
 from autograder.utils.printers.criteria_tree import CriteriaTreePrinter
 
 
@@ -134,8 +133,3 @@ def print_tree(self):
         """Prints a visual representation of the entire criteria tree."""
         printer = CriteriaTreePrinter()
         printer.print_tree(self)
-
-    def print_pre_executed_tree(self):
-        """Prints a visual representation of the entire pre-executed criteria tree."""
-        printer = CriteriaTreePrinter(PreExecutedTreeFormatter())
-        printer.print_tree(self)
diff --git a/autograder/utils/formatters/criteria_tree.py b/autograder/utils/formatters/criteria_tree.py
index 536fcb2..fe34f89 100644
--- a/autograder/utils/formatters/criteria_tree.py
+++ b/autograder/utils/formatters/criteria_tree.py
@@ -3,8 +3,7 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from autograder.models.dataclass.test_result import TestResult
-    from autograder.models.criteria_tree import TestCategory, Subject, Test
+    from autograder.models.criteria_tree import CategoryNode, SubjectNode, TestNode
 
 
 class CriteriaTreeFormatter(CriteriaTreeProcesser):
@@ -12,34 +11,16 @@ def header(self) -> str:
         return "🌲 Criteria Tree"
 
     @override
-    def process_test(self, test: "Test") -> List[str]:
+    def process_test(self, test: "TestNode") -> List[str]:
         result: List[str] = list()
-        result.append(f"  🧪 {test.name} (file: {test.file})")
-        for call in test.calls:
-            result.append(f"    - Parameters: {call.args}")
+        result.append(f"  🧪 {test.name} (file: {test.file_target})")
+        result.append(f"    - Parameters: {test.parameters}")
         return result
 
     @override
-    def process_subject(self, subject: "Subject") -> str:
+    def process_subject(self, subject: "SubjectNode") -> str:
         return f"📘{subject.name} (weight: {subject.weight})"
 
     @override
-    def process_category(self, category: "TestCategory") -> str:
-        return f"  📁 {category.name.upper()} (max_score: {category.max_score})"
-
-
-class PreExecutedTreeFormatter(CriteriaTreeFormatter):
-    @override
-    def header(self) -> str:
-        return "🌲 Pre-Executed Criteria Tree"
-
-    @override
-    def process_test(self, test: "Test | TestResult") -> List[str]:
-        if isinstance(test, TestResult):
-            if test.parameters:
-                params = f" (Parameters: {test.parameters})"
-            else:
-                params = ""
-            return [f"  - 📝 {test.test_name}{params} -> Score: {test.score}"]
-
-        return super().process_test(test)
+    def process_category(self, category: "CategoryNode") -> str:
+        return f"  📁 {category.name.upper()} (max_score: {category.weight})"
diff --git a/autograder/utils/printers/criteria_tree.py b/autograder/utils/printers/criteria_tree.py
index 236e475..686dc3d 100644
--- a/autograder/utils/printers/criteria_tree.py
+++ b/autograder/utils/printers/criteria_tree.py
@@ -2,7 +2,7 @@
 from autograder.utils.formatters.criteria_tree import CriteriaTreeFormatter
 
 if TYPE_CHECKING:
-    from autograder.models.criteria_tree import CriteriaTree, TestCategory, Subject
+    from autograder.models.criteria_tree import CriteriaTree, CategoryNode, SubjectNode
 
 
 class CriteriaTreePrinter:
@@ -19,7 +19,7 @@ def __decrease_depth(self) -> None:
     def __print_with_depth(self, formatted: str) -> None:
         print(f"{'    ' * self.__depth}{formatted}")
 
-    def __print_children(self, parent: "TestCategory | Subject") -> None:
+    def __print_children(self, parent: "CategoryNode | SubjectNode") -> None:
         for subject in parent.subjects:
             self.print_subject(subject)
 
@@ -28,18 +28,20 @@ def __print_children(self, parent: "TestCategory | Subject") -> None:
             for line in lines:
                 self.__print_with_depth(line)
 
-    def print_subject(self, subject: "Subject") -> None:
+    def print_subject(self, subject: "SubjectNode") -> None:
         self.__increase_depth()
         self.__print_with_depth(self.__formatter.process_subject(subject))
         self.__print_children(subject)
         self.__decrease_depth()
 
-    def print_category(self, category: "TestCategory") -> None:
+    def print_category(self, category: "CategoryNode") -> None:
         self.__print_with_depth(self.__formatter.process_category(category))
         self.__print_children(category)
 
     def print_tree(self, tree: "CriteriaTree") -> None:
         self.__print_with_depth(self.__formatter.header())
         self.print_category(tree.base)
-        self.print_category(tree.bonus)
-        self.print_category(tree.penalty)
+        if tree.bonus:
+            self.print_category(tree.bonus)
+        if tree.penalty:
+            self.print_category(tree.penalty)
diff --git a/autograder/utils/processers/criteria_tree.py b/autograder/utils/processers/criteria_tree.py
index 6e7e69f..8ee966f 100644
--- a/autograder/utils/processers/criteria_tree.py
+++ b/autograder/utils/processers/criteria_tree.py
@@ -2,18 +2,18 @@
 from typing import Any, TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from autograder.models.criteria_tree import TestCategory, Subject, Test
+    from autograder.models.criteria_tree import CategoryNode, SubjectNode, TestNode
 
 
 class CriteriaTreeProcesser(ABC):
     @abstractmethod
-    def process_subject(self, subject: "Subject") -> Any:
+    def process_subject(self, subject: "SubjectNode") -> Any:
         pass
 
     @abstractmethod
-    def process_test(self, test: "Test") -> Any:
+    def process_test(self, test: "TestNode") -> Any:
         pass
 
     @abstractmethod
-    def process_category(self, category: "TestCategory") -> Any:
+    def process_category(self, category: "CategoryNode") -> Any:
         pass

From 7a0ec32e86cfee4153e180e7e25daa4ef3214e5d Mon Sep 17 00:00:00 2001
From: jao <61636386+jaoppb@users.noreply.github.com>
Date: Mon, 5 Jan 2026 20:18:02 -0300
Subject: [PATCH 14/49] Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 autograder/services/graders/criteria_tree.py | 3 ++-
 autograder/utils/formatters/criteria_tree.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/autograder/services/graders/criteria_tree.py b/autograder/services/graders/criteria_tree.py
index d1767ca..3c86fd5 100644
--- a/autograder/services/graders/criteria_tree.py
+++ b/autograder/services/graders/criteria_tree.py
@@ -1,5 +1,6 @@
 import logging
-from typing import Dict, Optional, Sequence, override
+from typing import Dict, Optional, Sequence
+from typing_extensions import override
 from autograder.models.criteria_tree import (
     CategoryNode,
     CriteriaTree,
diff --git a/autograder/utils/formatters/criteria_tree.py b/autograder/utils/formatters/criteria_tree.py
index fe34f89..7d9d67c 100644
--- a/autograder/utils/formatters/criteria_tree.py
+++ b/autograder/utils/formatters/criteria_tree.py
@@ -19,7 +19,7 @@ def process_test(self, test: "TestNode") -> List[str]:
 
     @override
     def process_subject(self, subject: "SubjectNode") -> str:
-        return f"📘{subject.name} (weight: {subject.weight})"
+        return f"📘 {subject.name} (weight: {subject.weight})"
 
     @override
     def process_category(self, category: "CategoryNode") -> str:

From aed70a1ab78866f0c130d964e0fb66eb7b022f4b Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 20:04:16 -0300
Subject: [PATCH 15/49] fix: tests import

---
 tests/unit/test_pipeline_steps.py | 75 +++++++++++++++----------------
 1 file changed, 36 insertions(+), 39 deletions(-)

diff --git a/tests/unit/test_pipeline_steps.py b/tests/unit/test_pipeline_steps.py
index 638c31d..8cd5692 100644
--- a/tests/unit/test_pipeline_steps.py
+++ b/tests/unit/test_pipeline_steps.py
@@ -6,6 +6,7 @@
 2. GradeStep intelligently handles both CriteriaTree and Template inputs
 3. Single vs multi-submission pipeline modes work correctly
 """
+
 import sys
 from pathlib import Path
 
@@ -15,7 +16,7 @@
 
 from autograder.steps.build_tree_step import BuildTreeStep
 from autograder.steps.grade_step import GradeStep
-from autograder.models.dataclass.criteria_config import CriteriaConfig
+from autograder.models.config.criteria import CriteriaConfig
 from autograder.models.dataclass.step_result import StepStatus
 from autograder.models.abstract.template import Template
 from autograder.models.abstract.test_function import TestFunction
@@ -44,7 +45,7 @@ def execute(self, *args, **kwargs):
             passed=True,
             score=100.0,
             max_score=100.0,
-            message="Test passed (mock)"
+            message="Test passed (mock)",
         )
 
 
@@ -56,7 +57,7 @@ def __init__(self):
         self._tests = {
             "expect_output": MockTestFunction("expect_output"),
             "check_file": MockTestFunction("check_file"),
-            "validate_input": MockTestFunction("validate_input")
+            "validate_input": MockTestFunction("validate_input"),
         }
 
     @property
@@ -113,20 +114,20 @@ def create_simple_criteria():
                             "file": "main.py",
                             "parameters": [
                                 {"name": "stdin_input", "value": ["hello"]},
-                                {"name": "expected_output", "value": "hello"}
-                            ]
+                                {"name": "expected_output", "value": "hello"},
+                            ],
                         },
                         {
                             "name": "expect_output",
                             "file": "main.py",
                             "parameters": [
                                 {"name": "stdin_input", "value": ["world"]},
-                                {"name": "expected_output", "value": "world"}
-                            ]
-                        }
-                    ]
+                                {"name": "expected_output", "value": "world"},
+                            ],
+                        },
+                    ],
                 }
-            ]
+            ],
         },
         "bonus": {
             "weight": 10,
@@ -136,26 +137,24 @@ def create_simple_criteria():
                     "file": "main.py",
                     "parameters": [
                         {"name": "stdin_input", "value": ["bonus"]},
-                        {"name": "expected_output", "value": "bonus"}
-                    ]
+                        {"name": "expected_output", "value": "bonus"},
+                    ],
                 }
-            ]
-        }
+            ],
+        },
     }
 
 
 def create_mock_submission():
     """Create mock submission files."""
-    return {
-        "main.py": "# Simple echo program\nprint(input())"
-    }
+    return {"main.py": "# Simple echo program\nprint(input())"}
 
 
 def test_build_tree_step():
     """Test that BuildTreeStep correctly builds a CriteriaTree."""
-    print("\n" + "="*80)
+    print("\n" + "=" * 80)
     print("TEST: BuildTreeStep")
-    print("="*80)
+    print("=" * 80)
 
     # Create criteria and template
     criteria = create_simple_criteria()
@@ -188,9 +187,9 @@ def test_build_tree_step():
 
 def test_grade_from_tree():
     """Test that GradeStep can grade from a CriteriaTree."""
-    print("\n" + "="*80)
+    print("\n" + "=" * 80)
     print("TEST: GradeStep with CriteriaTree (Multi-Submission Mode)")
-    print("="*80)
+    print("=" * 80)
 
     # Build criteria tree first
     criteria = create_simple_criteria()
@@ -203,8 +202,7 @@ def test_grade_from_tree():
 
     # Create and execute grade step
     grade_step = GradeStep(
-        submission_files=submission_files,
-        submission_id="test_submission_1"
+        submission_files=submission_files, submission_id="test_submission_1"
     )
 
     result = grade_step.execute(criteria_tree)
@@ -229,9 +227,9 @@ def test_grade_from_tree():
 
 def test_grade_from_config():
     """Test that GradeStep can grade directly from config (single submission mode)."""
-    print("\n" + "="*80)
+    print("\n" + "=" * 80)
     print("TEST: GradeStep with Template (Single Submission Mode)")
-    print("="*80)
+    print("=" * 80)
 
     # Create criteria and template
     criteria = create_simple_criteria()
@@ -242,7 +240,7 @@ def test_grade_from_config():
     grade_step = GradeStep(
         criteria_json=criteria,
         submission_files=submission_files,
-        submission_id="test_submission_2"
+        submission_id="test_submission_2",
     )
 
     result = grade_step.execute(template)
@@ -267,15 +265,14 @@ def test_grade_from_config():
 
 def test_invalid_input_type():
     """Test that GradeStep rejects invalid input types."""
-    print("\n" + "="*80)
+    print("\n" + "=" * 80)
     print("TEST: GradeStep with Invalid Input Type")
-    print("="*80)
+    print("=" * 80)
 
     submission_files = create_mock_submission()
 
     grade_step = GradeStep(
-        submission_files=submission_files,
-        submission_id="test_submission_3"
+        submission_files=submission_files, submission_id="test_submission_3"
     )
 
     # Try to execute with invalid input (string)
@@ -291,9 +288,9 @@ def test_invalid_input_type():
 
 def run_all_tests():
     """Run all unit tests."""
-    print("\n" + "#"*80)
+    print("\n" + "#" * 80)
     print("# RUNNING PIPELINE STEPS UNIT TESTS")
-    print("#"*80)
+    print("#" * 80)
 
     try:
         # Test 1: Build tree
@@ -308,24 +305,24 @@ def run_all_tests():
         # Test 4: Invalid input handling
         test_invalid_input_type()
 
-        print("\n" + "#"*80)
+        print("\n" + "#" * 80)
         print("# ALL TESTS PASSED! ✓")
-        print("#"*80)
+        print("#" * 80)
 
     except AssertionError as e:
-        print("\n" + "#"*80)
+        print("\n" + "#" * 80)
         print(f"# TEST FAILED: {e}")
-        print("#"*80)
+        print("#" * 80)
         raise
     except Exception as e:
-        print("\n" + "#"*80)
+        print("\n" + "#" * 80)
         print(f"# UNEXPECTED ERROR: {e}")
-        print("#"*80)
+        print("#" * 80)
         import traceback
+
         traceback.print_exc()
         raise
 
 
 if __name__ == "__main__":
     run_all_tests()
-

From 3c0eaf6a7db56e31bcb7602c0965befd4cc441c2 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 20:04:35 -0300
Subject: [PATCH 16/49] fix: subject and category configs

---
 autograder/models/config/category.py         | 6 +++---
 autograder/models/config/subject.py          | 6 +++---
 autograder/services/parsers/criteria_tree.py | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/autograder/models/config/category.py b/autograder/models/config/category.py
index 50338ac..e553e53 100644
--- a/autograder/models/config/category.py
+++ b/autograder/models/config/category.py
@@ -6,7 +6,7 @@
 
 
 class CategoryConfig(BaseModel):
-    subject_name: str = Field(..., description="Name of the subject")
+    name: str = Field(..., description="Name of the subject")
     weight: float = Field(
         ..., ge=0, le=100, description="Weight of this subject (0-100)"
     )
@@ -15,7 +15,7 @@ class CategoryConfig(BaseModel):
     )
     subjects: Optional[List[SubjectConfig]] = Field(None, description="Nested subjects")
     subjects_weight: Optional[int] = Field(
-        ...,
+        None,
         ge=0,
         le=100,
         description="Weight of the subject when it is a heterogeneous tree",
@@ -35,7 +35,7 @@ def check_subjects_and_tests(self) -> "CategoryConfig":
 
         if has_tests and has_subjects and not has_subject_weight:
             raise ValueError(
-                "Subject needs 'subjects_weight' defined when has tests and subjects"
+                "Category needs 'subjects_weight' defined when has tests and subjects"
             )
 
         return self
diff --git a/autograder/models/config/subject.py b/autograder/models/config/subject.py
index 31afe08..5338323 100644
--- a/autograder/models/config/subject.py
+++ b/autograder/models/config/subject.py
@@ -4,7 +4,7 @@
 
 
 class SubjectConfig(BaseModel):
-    subject_name: str = Field(..., description="Name of the subject")
+    name: str = Field(..., description="Name of the subject")
     weight: float = Field(
         ..., ge=0, le=100, description="Weight of this subject (0-100)"
     )
@@ -15,7 +15,7 @@ class SubjectConfig(BaseModel):
         None, description="Nested subjects"
     )
     subjects_weight: Optional[int] = Field(
-        ...,
+        None,
         ge=0,
         le=100,
         description="Weight of the subject when it is a heterogeneous tree",
@@ -31,7 +31,7 @@ def check_subjects_and_tests(self) -> "SubjectConfig":
         has_subject_weight = self.subjects_weight is not None
 
         if not has_tests and not has_subjects:
-            raise ValueError("Category must have at least 'tests' or 'subjects'.")
+            raise ValueError("Subject must have at least 'tests' or 'subjects'.")
 
         if has_tests and has_subjects and not has_subject_weight:
             raise ValueError(
diff --git a/autograder/services/parsers/criteria_tree.py b/autograder/services/parsers/criteria_tree.py
index eb5f874..2eab502 100644
--- a/autograder/services/parsers/criteria_tree.py
+++ b/autograder/services/parsers/criteria_tree.py
@@ -24,7 +24,7 @@ def __parse_subjects(self, configs: List[SubjectConfig]) -> List[SubjectNode]:
         return subjects
 
     def __parse_subject(self, config: SubjectConfig) -> SubjectNode:
-        subject = SubjectNode(config.subject_name, config.weight)
+        subject = SubjectNode(config.name, config.weight)
 
         if config.subjects_weight:
             subject.subjects_weight = config.subjects_weight

From c1677f3f51cfead4de95cb6997f82e50bb9de15e Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 20:06:23 -0300
Subject: [PATCH 17/49] fix: test config

---
 autograder/models/config/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autograder/models/config/test.py b/autograder/models/config/test.py
index 9cfebd9..9af903e 100644
--- a/autograder/models/config/test.py
+++ b/autograder/models/config/test.py
@@ -19,7 +19,7 @@ class TestConfig(BaseModel):
         None, description="Target file for the test (if applicable)"
     )
     parameters: Optional[List[ParameterConfig]] = Field(
-        default_factory=list, description="Named parameters for the test function"
+        None, description="Named parameters for the test function"
     )
 
     model_config = {"extra": "forbid"}

From b15aad561596b1b1ad34aa6fafda332abd5548f2 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 20:09:35 -0300
Subject: [PATCH 18/49] fix: wrong factor calc

---
 autograder/services/graders/criteria_tree.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/autograder/services/graders/criteria_tree.py b/autograder/services/graders/criteria_tree.py
index 3c86fd5..72202d9 100644
--- a/autograder/services/graders/criteria_tree.py
+++ b/autograder/services/graders/criteria_tree.py
@@ -47,19 +47,21 @@ def __process_holder(self, holder: CategoryNode | SubjectNode) -> ResultNode:
         if holder.subjects and holder.tests:
             if not holder.subjects_weight:
                 raise ValueError(f"missing 'subjects_weight' for {holder.name}")
-            factor = holder.subjects_weight / 100.0
+            subjects_factor = holder.subjects_weight / 100.0
+            tests_factor = 1 - subjects_factor
         else:
-            factor = 1.0
+            subjects_factor = 1.0
+            tests_factor = 1.0
 
         if holder.subjects:
             subject_results = [
                 self.process_subject(inner_subject) for inner_subject in holder.subjects
             ]
-            self.__balance_nodes(subject_results, factor)
+            self.__balance_nodes(subject_results, subjects_factor)
 
         if holder.tests:
             test_results = [self.process_test(test) for test in holder.tests]
-            self.__balance_nodes(test_results, factor)
+            self.__balance_nodes(test_results, tests_factor)
 
         return result
 

From e422bc536746527c00c5346841f0ad0d4d10a165 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 20:10:44 -0300
Subject: [PATCH 19/49] fix: add missing list appending

---
 autograder/services/graders/criteria_tree.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/autograder/services/graders/criteria_tree.py b/autograder/services/graders/criteria_tree.py
index 72202d9..39a30bf 100644
--- a/autograder/services/graders/criteria_tree.py
+++ b/autograder/services/graders/criteria_tree.py
@@ -58,10 +58,12 @@ def __process_holder(self, holder: CategoryNode | SubjectNode) -> ResultNode:
                 self.process_subject(inner_subject) for inner_subject in holder.subjects
             ]
             self.__balance_nodes(subject_results, subjects_factor)
+            result.children.extend(subject_results)
 
         if holder.tests:
             test_results = [self.process_test(test) for test in holder.tests]
             self.__balance_nodes(test_results, tests_factor)
+            result.children.extend(test_results)
 
         return result
 

From 15b5baf706bc47a32ab8c08f8cbfd81b7954d971 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 20:13:06 -0300
Subject: [PATCH 20/49] fix: removed rounding at __balance_subject_weights

---
 autograder/services/parsers/criteria_tree.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autograder/services/parsers/criteria_tree.py b/autograder/services/parsers/criteria_tree.py
index 2eab502..9491f8e 100644
--- a/autograder/services/parsers/criteria_tree.py
+++ b/autograder/services/parsers/criteria_tree.py
@@ -42,7 +42,7 @@ def __balance_subject_weights(self, subjects: List[SubjectNode]) -> None:
         if total_weight > 0 and total_weight != 100:
             scaling_factor = 100 / total_weight
             for subject in subjects:
-                subject.weight = round(subject.weight * scaling_factor)
+                subject.weight = subject.weight * scaling_factor
 
     def __parse_tests(self, test_configs: List[TestConfig]) -> List[TestNode]:
         return [self.__parse_test(test_item) for test_item in test_configs]

From 15b690f75a464ac5595f1a9d252df3bfa94869e9 Mon Sep 17 00:00:00 2001
From: jaoppb <joaopedroperes06@gmail.com>
Date: Mon, 5 Jan 2026 20:15:38 -0300
Subject: [PATCH 21/49] fix: parsing subjects_weight to category

---
 autograder/services/parsers/criteria_tree.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/autograder/services/parsers/criteria_tree.py b/autograder/services/parsers/criteria_tree.py
index 9491f8e..db359ee 100644
--- a/autograder/services/parsers/criteria_tree.py
+++ b/autograder/services/parsers/criteria_tree.py
@@ -26,8 +26,7 @@ def __parse_subjects(self, configs: List[SubjectConfig]) -> List[SubjectNode]:
     def __parse_subject(self, config: SubjectConfig) -> SubjectNode:
         subject = SubjectNode(config.name, config.weight)
 
-        if config.subjects_weight:
-            subject.subjects_weight = config.subjects_weight
+        subject.subjects_weight = config.subjects_weight
 
         if config.subjects:
             subject.subjects = self.__parse_subjects(config.subjects)
@@ -71,6 +70,8 @@ def __parse_test(self, config: TestConfig) -> TestNode:
     def __parse_category(self, category_name, config: CategoryConfig) -> CategoryNode:
         category = CategoryNode(category_name, config.weight)
 
+        category.subjects_weight = config.subjects_weight
+
         if config.subjects:
             category.add_subjects(self.__parse_subjects(config.subjects))
 

From 0ada0beb15f4f40d40f1ff549b358c75deb48b31 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 11:20:09 -0300
Subject: [PATCH 22/49] refactor: change step order for more coherence

---
 autograder/autograder.py | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/autograder/autograder.py b/autograder/autograder.py
index e399ef4..73c395f 100644
--- a/autograder/autograder.py
+++ b/autograder/autograder.py
@@ -18,8 +18,7 @@ def build_pipeline(
                  custom_template = None,
                  feedback_mode = None,
                  submission_files = None,
-                 submission_id = None,
-                 is_multi_submission = False):
+                 submission_id = None):
     """
     Build an autograder pipeline based on configuration.
 
@@ -33,35 +32,24 @@ def build_pipeline(
         feedback_mode: Mode for feedback generation
         submission_files: Student submission files
         submission_id: Optional submission identifier
-        is_multi_submission: Whether grading multiple submissions (requires tree building)
-
     Returns:
         Configured AutograderPipeline
     """
     pipeline = AutograderPipeline()
 
+    # Load template
+    pipeline.add_step(TemplateLoaderStep(template_name, custom_template))
+
+    pipeline.add_step(BuildTreeStep(grading_criteria))
+
     # Pre-flight checks (if configured)
     if setup_config:
         pipeline.add_step(PreFlightStep(setup_config))
 
-    # Load template
-    pipeline.add_step(TemplateLoaderStep(template_name, custom_template))
-
-    # Conditional tree building and grading based on submission count
-    if is_multi_submission:
-        # Multi-submission mode: Build tree once, then grade
-        pipeline.add_step(BuildTreeStep(grading_criteria))
-        pipeline.add_step(GradeStep(
-            submission_files=submission_files,
-            submission_id=submission_id
-        ))
-    else:
-        # Single submission mode: Grade directly from config (one-pass)
-        pipeline.add_step(GradeStep(
-            criteria_json=grading_criteria,
-            submission_files=submission_files,
-            submission_id=submission_id
-        ))
+    pipeline.add_step(GradeStep(
+        submission_files=submission_files,
+        submission_id=submission_id
+    ))
 
     # Feedback generation (if configured)
     if include_feedback:

From 29a922b7d8e643e17b3278ad129546a1e81d2b21 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 11:40:09 -0300
Subject: [PATCH 23/49] feat: add Pydantic models for criteria configuration
 validation

---
 .../models/dataclass/criteria_config.py       | 102 ++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 autograder/models/dataclass/criteria_config.py

diff --git a/autograder/models/dataclass/criteria_config.py b/autograder/models/dataclass/criteria_config.py
new file mode 100644
index 0000000..08d553a
--- /dev/null
+++ b/autograder/models/dataclass/criteria_config.py
@@ -0,0 +1,102 @@
+"""
+Pydantic models for validating criteria configuration JSON structure.
+
+New schema structure:
+- Subjects are arrays with 'subject_name' field
+- Parameters are named objects: [{"name": "param", "value": "val"}, ...]
+- Tests contain parameters directly (no 'calls' array)
+- Root config has optional 'test_library' field
+"""
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Dict, Any, Optional, Union
+
+
+class ParameterConfig(BaseModel):
+    """Named parameter for a test function."""
+    name: str = Field(..., description="Parameter name")
+    value: Any = Field(..., description="Parameter value")
+
+    model_config = {"extra": "forbid"}
+
+
+class TestConfig(BaseModel):
+    """Configuration for a single test execution."""
+    name: str = Field(..., description="Name of the test function in the template")
+    file: Optional[str] = Field(None, description="Target file for the test (if applicable)")
+    parameters: Optional[List[ParameterConfig]] = Field(
+        default_factory=list,
+        description="Named parameters for the test function"
+    )
+
+    model_config = {"extra": "forbid"}
+
+    def get_args_list(self) -> List[Any]:
+        """Convert named parameters to positional arguments list."""
+        if not self.parameters:
+            return []
+        return [param.value for param in self.parameters]
+
+    def get_kwargs_dict(self) -> Dict[str, Any]:
+        """Convert named parameters to keyword arguments dictionary."""
+        if not self.parameters:
+            return {}
+        return {param.name: param.value for param in self.parameters}
+
+
+class SubjectConfig(BaseModel):
+    """Configuration for a subject node (can contain tests or nested subjects)."""
+    subject_name: str = Field(..., description="Name of the subject")
+    weight: float = Field(..., ge=0, le=100, description="Weight of this subject (0-100)")
+    tests: Optional[List[TestConfig]] = Field(None, description="Tests under this subject")
+    subjects: Optional[List['SubjectConfig']] = Field(None, description="Nested subjects")
+
+    model_config = {"extra": "forbid"}
+
+    def model_post_init(self, __context):
+        """Validate that subject has either tests or subjects, but not both or neither."""
+        has_tests = self.tests is not None and len(self.tests) > 0
+        has_subjects = self.subjects is not None and len(self.subjects) > 0
+
+        if has_tests and has_subjects:
+            raise ValueError(f"Subject '{self.subject_name}' cannot have both 'tests' and 'subjects'. Choose one.")
+        if not has_tests and not has_subjects:
+            raise ValueError(f"Subject '{self.subject_name}' must have either 'tests' or 'subjects'.")
+
+
+class CategoryConfig(BaseModel):
+    """Configuration for a category (base, bonus, or penalty)."""
+    weight: float = Field(..., ge=0, le=100, description="Weight of this category (0-100)")
+    subjects: Optional[List[SubjectConfig]] = Field(None, description="Subjects under this category (array)")
+    tests: Optional[List[TestConfig]] = Field(None, description="Tests directly under category")
+
+    model_config = {"extra": "forbid"}
+
+    def model_post_init(self, __context):
+        """Validate that category has either tests or subjects."""
+        has_tests = self.tests is not None and len(self.tests) > 0
+        has_subjects = self.subjects is not None and len(self.subjects) > 0
+
+        if has_tests and has_subjects:
+            raise ValueError("Category cannot have both 'tests' and 'subjects'. Choose one.")
+        if not has_tests and not has_subjects:
+            raise ValueError("Category must have either 'tests' or 'subjects'.")
+
+
+class CriteriaConfig(BaseModel):
+    """Root configuration for grading criteria."""
+    test_library: Optional[str] = Field(None, description="Name of the test library/template to use")
+    base: CategoryConfig = Field(..., description="Base grading criteria (required)")
+    bonus: Optional[CategoryConfig] = Field(None, description="Bonus points criteria")
+    penalty: Optional[CategoryConfig] = Field(None, description="Penalty criteria")
+
+    model_config = {"extra": "forbid"}
+
+    @classmethod
+    def from_dict(cls, data: dict) -> 'CriteriaConfig':
+        """Create and validate criteria config from dictionary."""
+        return cls.model_validate(data)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> 'CriteriaConfig':
+        """Create and validate criteria config from JSON string."""
+        return cls.model_validate_json(json_str)

From a4b0410221b6aa607a05517e3a6b4e859c2269bf Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 11:40:39 -0300
Subject: [PATCH 24/49] refactor: remove submission_id from grading process for
 cleaner implementation

---
 autograder/models/result_tree.py             | 6 ------
 autograder/services/grader_service.py        | 3 +--
 autograder/services/graders/criteria_tree.py | 5 ++---
 autograder/steps/grade_step.py               | 3 +--
 4 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/autograder/models/result_tree.py b/autograder/models/result_tree.py
index 92d3584..aa18b1e 100644
--- a/autograder/models/result_tree.py
+++ b/autograder/models/result_tree.py
@@ -189,7 +189,6 @@ class ResultTree:
     and tree traversal.
     """
     root: ResultNode
-    submission_id: Optional[str] = None
     template_name: Optional[str] = None
     metadata: Dict[str, Any] = field(default_factory=dict)
 
@@ -227,7 +226,6 @@ def get_passed_tests(self) -> List[TestResultNode]:
     def to_dict(self) -> dict:
         """Convert entire result tree to dictionary."""
         return {
-            "submission_id": self.submission_id,
             "template_name": self.template_name,
             "final_score": round(self.root.score, 2),
             "tree": self.root.to_dict(),
@@ -251,8 +249,6 @@ def print_tree(self, show_details: bool = True):
         print("=" * 70)
 
         # Print header info
-        if self.submission_id:
-            print(f"📝 Submission: {self.submission_id}")
         if self.template_name:
             print(f"📋 Template: {self.template_name}")
 
@@ -358,8 +354,6 @@ def print_summary(self):
         print("📊 GRADING SUMMARY")
         print("=" * 70)
 
-        if self.submission_id:
-            print(f"Submission: {self.submission_id}")
 
         print(f"\n🏆 Final Score: {self.root.score:.2f}/100")
 
diff --git a/autograder/services/grader_service.py b/autograder/services/grader_service.py
index 96ce68e..a440ce8 100644
--- a/autograder/services/grader_service.py
+++ b/autograder/services/grader_service.py
@@ -16,7 +16,6 @@ def grade_from_tree(
         self,
         criteria_tree: CriteriaTree,
         submission_files: Dict[str, Any],
-        submission_id: Optional[str] = None,
     ) -> ResultTree:
         grader = CriteriaTreeGrader(submission_files)
-        return grader.grade(criteria_tree, submission_id)
+        return grader.grade(criteria_tree)
diff --git a/autograder/services/graders/criteria_tree.py b/autograder/services/graders/criteria_tree.py
index 39a30bf..effa77f 100644
--- a/autograder/services/graders/criteria_tree.py
+++ b/autograder/services/graders/criteria_tree.py
@@ -105,8 +105,7 @@ def __find_first_test(self, node: CategoryNode | SubjectNode) -> Optional[TestNo
 
         return None
 
-    def grade(self, tree: CriteriaTree, submission_id: Optional[str]) -> ResultTree:
-        self.logger.info(f"Grading from tree for submission: {submission_id}")
+    def grade(self, tree: CriteriaTree) -> ResultTree:
 
         root = ResultNode(name="root", node_type=NodeType.CATEGORY, weight=100.0)
 
@@ -121,7 +120,7 @@ def grade(self, tree: CriteriaTree, submission_id: Optional[str]) -> ResultTree:
             penalty_result = self.process_category(tree.penalty)
             root.children.append(penalty_result)
 
-        result_tree = ResultTree(root, submission_id)
+        result_tree = ResultTree(root)
 
         # Handle AI executor batch if needed
         # Note: For tree-based grading, the template is embedded in test nodes
diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
index 4719f24..a7af95a 100644
--- a/autograder/steps/grade_step.py
+++ b/autograder/steps/grade_step.py
@@ -45,8 +45,7 @@ def execute(self, input: CriteriaTree) -> StepResult[GradingResult]:
         try:
             result_tree = self._grader_service.grade_from_tree(
                 criteria_tree=input,
-                submission_files=self._submission_files,
-                submission_id=self._submission_id,
+                submission_files=self._submission_files
             )
 
             # Create grading result

From 20e7954cc0267c7887176ae3359dfb25ec485907 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 11:40:49 -0300
Subject: [PATCH 25/49] feat: implement feedback generation in feedback step

---
 autograder/steps/feedback_step.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/autograder/steps/feedback_step.py b/autograder/steps/feedback_step.py
index 5727e84..41f9dc0 100644
--- a/autograder/steps/feedback_step.py
+++ b/autograder/steps/feedback_step.py
@@ -12,4 +12,9 @@ def __init__(self,
 
     def execute(self, input: GradingResult) -> GradingResult:
         """Adds feedback to the grading result using the reporter service."""
-        pass
+        feedback = self._reporter_service.generate_feedback(
+            grading_result=input,
+            config=self._feedback_config
+        )
+        input.feedback = feedback
+        return input

From 73da41890b58a915ac03bf4c7a098e25ad3812ca Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 11:40:54 -0300
Subject: [PATCH 26/49] refactor: update result_tree attribute to be optional
 in grading result

---
 autograder/models/dataclass/grading_result.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autograder/models/dataclass/grading_result.py b/autograder/models/dataclass/grading_result.py
index 65c06b2..256ceea 100644
--- a/autograder/models/dataclass/grading_result.py
+++ b/autograder/models/dataclass/grading_result.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 from typing import Optional
+from autograder.models.result_tree import ResultTree
 
 
 @dataclass
@@ -7,7 +8,7 @@ class GradingResult:
     final_score: float
     status: str
     feedback: Optional[str] = None
-    result_tree: 'ResultTree' = None
+    result_tree: Optional['ResultTree'] = None
     # In case of error
     error: Optional[str] = None
     failed_at_step: Optional[str] = None

From 38a07eca1246116d64fc4ffe304f7b48c9e0a612 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:26:51 -0300
Subject: [PATCH 27/49] feat: implement score setting in ExporterStep with
 error handling

---
 autograder/steps/export_step.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/autograder/steps/export_step.py b/autograder/steps/export_step.py
index f97f778..245cd18 100644
--- a/autograder/steps/export_step.py
+++ b/autograder/steps/export_step.py
@@ -1,9 +1,28 @@
 from autograder.models.abstract.step import Step
-from autograder.models.dataclass.step_result import StepResult
-
+from autograder.models.dataclass.step_result import StepResult, StepStatus
 
 class ExporterStep(Step):
     def __init__(self, remote_driver):
         self._remote_driver = remote_driver # UpstashDriver
     def execute(self, input) -> StepResult:
-        pass
\ No newline at end of file
+        try:
+            # Extract username and score from input
+            username = input.username
+            score = input.score
+
+            # Set the score using UpstashDriver
+            self._remote_driver.set_score(username, score)
+
+            # Return success result
+            return StepResult(
+                data={"username": username, "score": score},
+                status=StepStatus.SUCCESS
+            )
+        except Exception as e:
+            # Return failure result
+            return StepResult(
+                data=None,
+                status=StepStatus.FAIL,
+                error=str(e),
+                failed_at_step="ExporterStep"
+            )

From e3da9472a23de33809c55c577d7b1d575309d5bd Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:47:10 -0300
Subject: [PATCH 28/49] refactor: Clean reporters and create
 reporter_service.py

---
 autograder/services/report/ai_reporter.py     | 211 +-----------------
 autograder/services/report/base_reporter.py   |  49 ----
 .../services/report/default_reporter.py       | 150 +------------
 .../services/report/reporter_factory.py       |  27 ---
 .../services/report/reporter_service.py       |  10 +
 5 files changed, 17 insertions(+), 430 deletions(-)
 delete mode 100644 autograder/services/report/base_reporter.py
 delete mode 100644 autograder/services/report/reporter_factory.py
 create mode 100644 autograder/services/report/reporter_service.py

diff --git a/autograder/services/report/ai_reporter.py b/autograder/services/report/ai_reporter.py
index 93c4564..1d20251 100644
--- a/autograder/services/report/ai_reporter.py
+++ b/autograder/services/report/ai_reporter.py
@@ -1,208 +1,5 @@
-from openai import OpenAI
 
-from autograder.builder.models.template import Template
-from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
-from autograder.services.report.base_reporter import BaseReporter
-from autograder.utils.secrets_fetcher import get_secret
-
-
-# Supondo que estas classes estão em seus respectivos arquivos e são importáveis
-# from .base_reporter import BaseReporter
-# from autograder.core.models.feedback_preferences import FeedbackPreferences
-# from autograder.core.models.result import Result
-
-class AIReporter(BaseReporter):
-    """
-    Gera um feedback sofisticado e humanizado, enviando um prompt detalhado
-    para um modelo de IA.
-    """
-
-    def __init__(self, result: 'Result', feedback: 'FeedbackPreferences', test_library: 'Template', quota: int):
-        super().__init__(result, feedback,test_library)
-        openai_key = get_secret("OPENAI_API_KEY", "AUTOGRADER_OPENAI_KEY", "us-east-1")
-        if not openai_key:
-            raise ValueError("A chave da API da OpenAI é necessária para o AiReporter.")
-        self.client = OpenAI(api_key=openai_key)
-        self.quota = quota
-        self.test_library = test_library
-
-    def generate_feedback(self) -> str:
-        """
-        Constrói um prompt detalhado e chama o modelo de IA para gerar o feedback.
-        """
-        final_prompt = self._build_prompt()
-
-        try:
-            response = self.client.chat.completions.create(
-                model="gpt-4",  # Ou outro modelo de sua escolha
-                messages=[
-                    {"role": "system", "content": self.feedback.ai.feedback_persona},
-                    {"role": "user", "content": final_prompt}
-                ],
-                temperature=0.6)
-            ai_generated_text = response.choices[0].message.content
-
-
-        except Exception as e:
-            ai_generated_text = f"**Ocorreu um erro ao gerar o feedback da IA:** {e}\n\nRetornando para o feedback padrão."
-
-        # --- Formata o relatório final ---
-        report_parts = [
-            f"# {self.feedback.general.report_title}",
-            f"<sup>Este é um feedback gerado por IA e pode conter erros. Você tem {self.quota} créditos restantes.</sup>",
-            f"\nOlá, **{self.result.author}**! Aqui está um feedback detalhado sobre sua atividade.",
-            f"> **Nota Final:** **`{self.result.final_score:.2f} / 100`**",
-            "---",
-            ai_generated_text  # O conteúdo principal vem da IA
-        ]
-
-        if self.feedback.general.add_report_summary:
-            summary = self._build_summary()
-            if summary:
-                report_parts.append(summary)
-
-        report_parts.append("\n\n---\n" + "> Caso queira tirar uma dúvida específica, entre em contato com o Chapter.")
-
-        return "\n".join(filter(None, report_parts))
-
-    def _format_parameters(self, params: dict) -> str:
-        """Helper function to format parameters into a readable code string."""
-        if not params:
-            return ""
-        parts = [f"`{k}`: `{v}`" if isinstance(v, str) else f"`{k}`: `{v}`" for k, v in params.items()]
-        return f" (Parâmetros: {', '.join(parts)})"
-
-    def _build_prompt(self) -> str:
-        """Monta todas as informações necessárias em um único e grande prompt para a IA."""
-
-        prompt_parts = [
-            f"**Persona da IA:**\n{self.feedback.ai.feedback_persona}",
-            f"**Contexto da Atividade:**\n{self.feedback.ai.assignment_context}",
-            f"**Orientações Adicionais:**\n{self.feedback.ai.extra_orientations}",
-            f"**Tom do Feedback:**\n{self.feedback.ai.feedback_tone}",
-            f"**Nível de Ajuda com Soluções:**\n{self.feedback.ai.provide_solutions}",
-            "---",
-            self._get_submission_files_as_text(),
-            "---",
-            self._format_test_results_for_prompt(),
-            "---",
-            self._format_learning_resources_for_prompt(),
-            "---",
-            "**Sua Tarefa:**\nCom base em todo o contexto, código e resultados dos testes fornecidos, escreva um feedback em markdown que seja útil e educativo, seguindo todas as orientações."
-        ]
-        return "\n\n".join(filter(None, prompt_parts))
-
-    def _get_submission_files_as_text(self) -> str:
-        """Lê o conteúdo dos arquivos do aluno especificados nas preferências."""
-        files_to_read = self.feedback.ai.submission_files_to_read
-        if not files_to_read:
-            return "**Código do Aluno:**\nNenhum arquivo foi especificado para leitura."
-
-        file_contents = ["**Código do Aluno:**"]
-        for filename in files_to_read:
-            content = self.result.submission_files.get(filename, f"Arquivo '{filename}' não encontrado.")
-            file_contents.append(f"\n---\n`{filename}`\n---\n```\n{content}\n```")
-
-        return "\n".join(file_contents)
-
-    def _format_test_results_for_prompt(self) -> str:
-        """Formata os resultados dos testes em uma string para a IA analisar."""
-        results_parts = ["**Resultados dos Testes para Análise:**"]
-
-        failed_base = [res for res in self.result.base_results if res.score < 100]
-        passed_bonus = [res for res in self.result.bonus_results if res.score >= 100]
-        failed_penalty = [res for res in self.result.penalty_results if res.score < 100]
-
-        if failed_base:
-            results_parts.append("\n**Testes Obrigatórios que Falharam (Erros Críticos):**")
-            for res in failed_base:
-                results_parts.append(
-                    f"- Teste: `{res.test_name}`, Parâmetros: `{res.parameters}`, Mensagem: {res.report}")
-
-        if passed_bonus and self.feedback.general.show_passed_tests:
-            results_parts.append("\n**Testes Bônus Concluídos com Sucesso (Elogiar):**")
-            for res in passed_bonus:
-                results_parts.append(f"- Teste: `{res.test_name}`, Parâmetros: `{res.parameters}`")
-
-        if failed_penalty:
-            results_parts.append("\n**Penalidades Aplicadas (Más Práticas Detectadas):**")
-            for res in failed_penalty:
-                results_parts.append(
-                    f"- Teste: `{res.test_name}`, Parâmetros: `{res.parameters}`, Mensagem: {res.report}")
-
-        return "\n".join(results_parts)
-
-    def _format_learning_resources_for_prompt(self) -> str:
-        """Formata o conteúdo online para que a IA saiba qual link sugerir para cada erro."""
-        if not self.feedback.general.online_content:
-            return ""
-
-        resource_parts = [
-            "**Recursos de Aprendizagem Disponíveis:**\nSe um teste que falhou estiver listado abaixo, sugira o link correspondente."]
-
-        for resource in self.feedback.general.online_content:
-            tests = ", ".join(f"`{t}`" for t in resource.linked_tests)
-            resource_parts.append(
-                f"- Se os testes {tests} falharem, recomende este link: [{resource.description}]({resource.url})")
-
-        return "\n".join(resource_parts)
-
-    def _build_summary(self) -> str:
-        """Constructs the final summary section of the report using a markdown table."""
-        summary_parts = ["\n---\n\n### 📝 Resumo dos Pontos de Atenção"]
-        failed_base = [res for res in self.result.base_results if res.score < 100]
-        failed_penalty = [res for res in self.result.penalty_results if res.score < 100]
-
-        if not failed_base and not failed_penalty:
-            return ""  # No need for a summary if everything is okay
-
-        summary_parts.append("| Ação | Tópico | Detalhes do Teste |")
-        summary_parts.append("|:---|:---|:---|")
-
-        all_failed = failed_base + failed_penalty
-        for res in all_failed:
-            try:
-                # Get the test function from the library to access its description
-                print("Looking for mother function of test:", res.test_name)
-                print(self.test_library)
-                print("Available tests in library:", self.test_library.template_name)
-                test_func = self.test_library.get_test(res.test_name)
-                print("Testing function:", test_func.name)
-                description = test_func.description
-            except AttributeError:
-                description = "Descrição não disponível."
-
-            params_str = self._format_parameters(res.parameters).replace(" (Parâmetros: ", "").replace(")", "")
-
-            # Determine the action type
-            action = "Revisar"
-            if res in failed_penalty:
-                action = "Corrigir (Penalidade)"
-
-            # Build the detailed cell content
-            details_cell = (
-                f"**Teste:** `{res.test_name}`<br>"
-                f"**O que ele faz:** *{description}*<br>"
-                f"**Parâmetros:** <sub>{params_str or 'N/A'}</sub>"
-            )
-
-            summary_parts.append(f"| {action} | `{res.subject_name}` | {details_cell} |")
-
-        return "\n".join(summary_parts)
-
-    def _get_mock_ai_response(self) -> str:
-        """Uma resposta mockada para fins de teste, já que não estamos fazendo uma chamada de API real."""
-        return (
-            "### Análise Geral\n"
-            "Seu projeto está bem estruturado, mas notei alguns pontos de atenção, principalmente relacionados à acessibilidade das imagens e à responsividade.\n\n"
-            "#### Pontos a Melhorar\n"
-            "> **Acessibilidade de Imagens**\n"
-            "> Percebi que uma de suas imagens está sem o atributo `alt`. Este atributo é fundamental para que leitores de tela possam descrever a imagem para usuários com deficiência visual. Analisando seu `index.html`, a segunda tag `<img>` precisa ser corrigida.\n\n"
-            "> **Responsividade com Media Queries**\n"
-            "> Seu CSS não inclui `@media` queries. Sem elas, seu layout não conseguirá se adaptar a telas menores, como as de celulares. Recomendo fortemente a leitura do material sobre Media Queries para implementar essa funcionalidade."
-        )
-
-    @classmethod
-    def create(cls, result: 'Result', feedback: 'FeedbackPreferences', quota: int, test_library: 'Template'):
-        response = cls(result, feedback, quota, test_library)
-        return response
+class AiReporter:
+    def generate_report(self, result):
+        # Placeholder for AI report generation logic
+        return "AI-generated report based on analysis data."
\ No newline at end of file
diff --git a/autograder/services/report/base_reporter.py b/autograder/services/report/base_reporter.py
deleted file mode 100644
index a013cb0..0000000
--- a/autograder/services/report/base_reporter.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import List, Dict
-
-# Assuming these classes are in their respective, importable files
-# from autograder.core.models.feedback_preferences import FeedbackPreferences
-# from autograder.core.models.result import Result
-# from autograder.builder.tree_builder import TestResult
-
-class BaseReporter(ABC):
-    """Abstract base class for reporting test results."""
-    def __init__(self, result: 'Result', feedback: 'FeedbackPreferences',template):
-        self.result = result
-        self.feedback = feedback
-        self.template = template
-        # A map to quickly find learning resources for a given test name
-        self._content_map = self._build_content_map()
-
-    def _build_content_map(self) -> Dict[str, 'FeedbackPreferences.LearningResource']:
-        """
-        Creates a dictionary for fast lookups of learning resources by test name.
-        This is a shared utility for any reporter.
-        """
-        content_map = {}
-        for resource in self.feedback.general.online_content:
-            for test_name in resource.linked_tests:
-                content_map[test_name] = resource
-        return content_map
-
-    def _group_results_by_subject(self, results: List['TestResult']) -> Dict[str, List['TestResult']]:
-        """
-        Groups a flat list of TestResult objects into a dictionary keyed by subject name.
-        This is a shared utility for any reporter.
-        """
-        grouped = {}
-        for result in results:
-            if result.subject_name not in grouped:
-                grouped[result.subject_name] = []
-            grouped[result.subject_name].append(result)
-        return grouped
-
-    @abstractmethod
-    def generate_feedback(self):
-        """Generate feedback based on the test results."""
-        pass
-
-    @classmethod
-    def create(cls, result: 'Result', feedback: 'FeedbackPreferences',template):
-        response = cls(result, feedback,template)
-        return response
\ No newline at end of file
diff --git a/autograder/services/report/default_reporter.py b/autograder/services/report/default_reporter.py
index 476f12e..860892e 100644
--- a/autograder/services/report/default_reporter.py
+++ b/autograder/services/report/default_reporter.py
@@ -1,149 +1,5 @@
-from autograder.builder.models.template import Template
-from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
-from autograder.services.report.base_reporter import BaseReporter
 
 
-class DefaultReporter(BaseReporter):
-    """
-    Generates a structured and visually appealing markdown feedback report
-    designed to be a clear and helpful learning tool for students.
-    """
-
-    def __init__(self, result: 'Result', feedback: 'FeedbackPreferences', test_library: 'Template'):
-        super().__init__(result, feedback, test_library)
-        self.test_library = test_library
-
-    def generate_feedback(self) -> str:
-        """
-        Builds the entire markdown report by assembling its various sections.
-        """
-        report_parts = [
-            self._build_header(),
-            self._build_category_section("bonus"),
-            self._build_category_section("base"),
-            self._build_category_section("penalty")
-        ]
-
-        if self.feedback.general.add_report_summary:
-            summary = self._build_summary()
-            if summary:  # Only add summary if it's not empty
-                report_parts.append(summary)
-
-        report_parts.append(self._build_footer())
-        return "\n".join(filter(None, report_parts))
-
-    def _format_parameters(self, params: dict) -> str:
-        """Helper function to format parameters into a readable code string."""
-        if not params:
-            return ""
-        parts = [f"`{k}`: `{v}`" if isinstance(v, str) else f"`{k}`: `{v}`" for k, v in params.items()]
-        return f" (Parâmetros: {', '.join(parts)})"
-
-    def _build_header(self) -> str:
-        """Constructs the top section of the report."""
-        header_parts = [f"# {self.feedback.general.report_title}"]
-        if self.feedback.general.show_score:
-            header_parts.append(f"> **Nota Final:** **`{self.result.final_score:.2f} / 100`**")
-
-        header_parts.append(
-            f"\nOlá, **{self.result.author}**! 👋\n\nAqui está o feedback detalhado sobre sua atividade. Use este guia para entender seus acertos e os pontos que podem ser melhorados.")
-        return "\n".join(header_parts)
-
-    def _build_category_section(self, category_name: str) -> str:
-        """Builds a report section for a specific category with enhanced formatting and text."""
-        category_results = getattr(self.result, f"{category_name}_results", [])
-        header = self.feedback.default.category_headers.get(category_name, category_name.capitalize())
-        section_parts = [f"\n---\n\n## {header}"]
-
-        results_to_show = []
-        intro_text = ""
-        is_bonus = False
-
-        if category_name == "bonus":
-            is_bonus = True
-            if self.feedback.general.show_passed_tests:
-                results_to_show = [res for res in category_results if res.score >= 60]
-                intro_text = "Parabéns! Você completou os seguintes itens bônus, demonstrando um ótimo conhecimento:" if results_to_show else "Nenhum item bônus foi completado desta vez. Continue se desafiando!"
-        else:  # base and penalty
-            results_to_show = [res for res in category_results if res.score < 60]
-            if category_name == "base":
-                intro_text = "Encontramos alguns pontos nos requisitos essenciais que precisam de sua atenção:" if results_to_show else "Excelente! Todos os requisitos essenciais foram atendidos com sucesso."
-            elif category_name == "penalty":
-                intro_text = "Foram detectadas algumas práticas que resultaram em penalidades. Veja os detalhes abaixo para entender como corrigi-las:" if results_to_show else "Ótimo trabalho! Nenhuma má prática foi detectada no seu projeto."
-
-        section_parts.append(intro_text)
-
-        if not results_to_show:
-            return "\n".join(section_parts)
-
-        grouped_results = self._group_results_by_subject(results_to_show)
-
-        for subject, results in grouped_results.items():
-            section_parts.append(f"\n#### Tópico: {subject.replace('_', ' ').capitalize()}")
-            for res in results:
-                params_str = self._format_parameters(res.parameters)
-
-                if is_bonus:
-                    status_text = "✅ **Passou**"
-                    report_prefix = "Parabéns!"
-                else:
-                    status_text = "❌ **Falhou**"
-                    report_prefix = "Atenção:" if category_name == "base" else "Cuidado!"
-
-                feedback_item = [
-                    f"> {status_text} no teste `{res.test_name}`{params_str}",
-                    f"> - **Detalhes:** {report_prefix} {res.report}\n"
-                ]
-
-                if not is_bonus:
-                    linked_content = self._content_map.get(res.test_name)
-                    if linked_content:
-                        feedback_item.append(
-                            f"> - 📚 **Recurso Sugerido:** [{linked_content.description}]({linked_content.url})\n")
-
-                section_parts.append("\n".join(feedback_item))
-
-        return "\n".join(section_parts)
-
-    def _build_summary(self) -> str:
-        """Constructs the final summary section of the report using a markdown table."""
-        summary_parts = ["\n---\n\n### 📝 Resumo dos Pontos de Atenção"]
-        failed_base = [res for res in self.result.base_results if res.score < 100]
-        failed_penalty = [res for res in self.result.penalty_results if res.score < 100]
-
-        if not failed_base and not failed_penalty:
-            return ""  # No need for a summary if everything is okay
-
-        summary_parts.append("| Ação | Tópico | Detalhes do Teste |")
-        summary_parts.append("|:---|:---|:---|")
-
-        all_failed = failed_base + failed_penalty
-        for res in all_failed:
-            try:
-                # Get the test function from the library to access its description
-                test_func = self.test_library.get_test(res.test_name)
-                description = test_func.description
-            except AttributeError:
-                description = "Descrição não disponível."
-
-            params_str = self._format_parameters(res.parameters).replace(" (Parâmetros: ", "").replace(")", "")
-
-            # Determine the action type
-            action = "Revisar"
-            if res in failed_penalty:
-                action = "Corrigir (Penalidade)"
-
-            # Build the detailed cell content
-            details_cell = (
-                f"**Teste:** `{res.test_name}`<br>"
-                f"**O que foi verificado:** *{description}*<br>"
-                f"**Parâmetros:** <sub>{params_str or 'N/A'}</sub>"
-            )
-
-            summary_parts.append(f"| {action} | `{res.subject_name}` | {details_cell} |")
-
-        return "\n".join(summary_parts)
-
-    def _build_footer(self) -> str:
-        """Constructs the footer of the report."""
-        return "\n---\n" + "> Continue praticando e melhorando seu código. Cada desafio é uma oportunidade de aprender! 🚀"
+class DefaultReporter:
+    def generate_report(self, results):
+        pass
\ No newline at end of file
diff --git a/autograder/services/report/reporter_factory.py b/autograder/services/report/reporter_factory.py
deleted file mode 100644
index 752ac18..0000000
--- a/autograder/services/report/reporter_factory.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from autograder.models.dataclass.feedback_preferences import FeedbackPreferences
-from autograder.models.dataclass.result import Result
-from autograder.services.report.ai_reporter import AIReporter
-from autograder.services.report.default_reporter import DefaultReporter
-class ReporterFactory:
-
-
-    @staticmethod
-    def create_reporter_for(mode: str):
-        """Creates a reporter instance based on the specified mode."""
-        if mode == "ai":
-            return ReporterFactory.create_ai_reporter()
-        else:
-            return ReporterFactory.create_default_reporter()
-
-
-    @classmethod
-    def create_ai_reporter(cls, result: Result, feedback: FeedbackPreferences,template, quota):
-        """Creates an AIReporter instance with the students results"""
-        return AIReporter.create(result,feedback,template,quota)
-
-    @classmethod
-    def create_default_reporter(cls, result: Result,feedback: FeedbackPreferences,template):
-        """Creates a DefaultReporter instance with the students results"""
-        return DefaultReporter.create(result,feedback,template)
-
-
diff --git a/autograder/services/report/reporter_service.py b/autograder/services/report/reporter_service.py
new file mode 100644
index 0000000..3493267
--- /dev/null
+++ b/autograder/services/report/reporter_service.py
@@ -0,0 +1,10 @@
+from autograder.services.report.default_reporter import DefaultReporter
+from autograder.services.report.ai_reporter import AiReporter
+
+class ReporterService:
+    def __init__(self, feedback_mode: str):
+        if feedback_mode == "ai":
+            self._reporter = DefaultReporter()
+        else:
+            self._reporter = AiReporter()
+

From 920790f89c38cb17520a56dc9cf07cbbd34095db Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:47:16 -0300
Subject: [PATCH 29/49] feat: add Submission and SubmissionFile dataclasses for
 handling submissions

---
 autograder/models/dataclass/submission.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 autograder/models/dataclass/submission.py

diff --git a/autograder/models/dataclass/submission.py b/autograder/models/dataclass/submission.py
new file mode 100644
index 0000000..44d8cc3
--- /dev/null
+++ b/autograder/models/dataclass/submission.py
@@ -0,0 +1,14 @@
+from typing import List
+
+from autograder.models import dataclass
+
+@dataclass
+class SubmissionFile:
+    filename: str
+    content: str
+@dataclass
+class Submission:
+    username: str
+    user_id: int
+    assignment_id: int
+    submission_files: List[SubmissionFile]

From 73ff5f034b3be0a9db993d9320521ddb14d7e409 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:47:22 -0300
Subject: [PATCH 30/49] fix: update run method to specify input_data type as
 Submission

---
 autograder/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autograder/pipeline.py b/autograder/pipeline.py
index 18be298..114f45b 100644
--- a/autograder/pipeline.py
+++ b/autograder/pipeline.py
@@ -10,7 +10,7 @@ def __init__(self):
     def add_step(self, step: Step) -> None:
         self._steps.append(step)
 
-    def run(self, input_data):
+    def run(self, input_data:'Submission'):
         result = StepResult(data=input_data, status=StepStatus.SUCCESS, original_input=input_data) #Initialize result object with input data
 
         for step in self._steps:

From e27157d438916568aa0bf37fac2d0bb6dfd7aa42 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:47:27 -0300
Subject: [PATCH 31/49] refactor: remove submission_id parameter from GradeStep
 initializer

---
 autograder/steps/grade_step.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
index a7af95a..2d6f3cf 100644
--- a/autograder/steps/grade_step.py
+++ b/autograder/steps/grade_step.py
@@ -18,7 +18,6 @@ class GradeStep(Step):
     def __init__(
         self,
         submission_files: Dict[str, Any],
-        submission_id: Optional[str],
     ):
         """
         Initialize the grade step.
@@ -26,10 +25,8 @@ def __init__(
         Args:
             criteria_json: Raw criteria configuration (only needed for single submission mode)
             submission_files: Student submission files
-            submission_id: Optional identifier for the submission
         """
         self._submission_files = submission_files
-        self._submission_id = submission_id
         self._grader_service = GraderService()
 
     def execute(self, input: CriteriaTree) -> StepResult[GradingResult]:

From 161a523aaf672524f6b1c32c08896cc75465456e Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:47:38 -0300
Subject: [PATCH 32/49] refactor: add comments to clarify pipeline step
 functionality in autograder

---
 autograder/autograder.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/autograder/autograder.py b/autograder/autograder.py
index 73c395f..bc2bd25 100644
--- a/autograder/autograder.py
+++ b/autograder/autograder.py
@@ -38,9 +38,9 @@ def build_pipeline(
     pipeline = AutograderPipeline()
 
     # Load template
-    pipeline.add_step(TemplateLoaderStep(template_name, custom_template))
+    pipeline.add_step(TemplateLoaderStep(template_name, custom_template)) # Passes the template to the next step
 
-    pipeline.add_step(BuildTreeStep(grading_criteria))
+    pipeline.add_step(BuildTreeStep(grading_criteria)) # Uses template to match selected tests in criteria and builds tree
 
     # Pre-flight checks (if configured)
     if setup_config:
@@ -48,16 +48,15 @@ def build_pipeline(
 
     pipeline.add_step(GradeStep(
         submission_files=submission_files,
-        submission_id=submission_id
-    ))
+    )) # Generates GradingResult with final score and result tree
 
     # Feedback generation (if configured)
     if include_feedback:
-        reporter_service = ReporterFactory.create_reporter_for(feedback_mode)
-        pipeline.add_step(FeedbackStep(reporter_service, feedback_config))
+        reporter_service = ReporterFactory.create_reporter_for(feedback_mode,)
+        pipeline.add_step(FeedbackStep(reporter_service, feedback_config)) # Uses GradingResult to generate feedback and appends it to GradingResult
 
     # Export results
-    pipeline.add_step(ExporterStep(UpstashDriver))
+    pipeline.add_step(ExporterStep(UpstashDriver)) # Exports final results and feedback
 
     return pipeline
 

From c8196d10329db4728dac67e904fb5ff903266618 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:56:36 -0300
Subject: [PATCH 33/49] refactor: streamline template loading by removing
 legacy methods and simplifying imports

---
 .../services/template_library_service.py      | 116 +++---------------
 1 file changed, 14 insertions(+), 102 deletions(-)

diff --git a/autograder/services/template_library_service.py b/autograder/services/template_library_service.py
index d7ac00a..8bb191f 100644
--- a/autograder/services/template_library_service.py
+++ b/autograder/services/template_library_service.py
@@ -1,109 +1,21 @@
-import importlib.util
-import inspect
 from autograder.models.abstract.template import Template
-
+from autograder.template_library.web_dev import WebDevTemplate
+from autograder.template_library.api_testing import ApiTestingTemplate
+from autograder.template_library.input_output import InputOutputTemplate
+from autograder.template_library.essay_grader import EssayGraderTemplate
 
 class TemplateLibraryService:
-    @staticmethod
-    def get_template(template_name: str, custom_template_content: str = None, clean=False):
-        if template_name == "custom":
-            if not custom_template_content:
-                raise ValueError("Custom template content must be provided for 'custom' template type.")
-            return TemplateLibraryService._load_custom_template_from_content(custom_template_content)
-
-        if template_name == "webdev":
-            from autograder.template_library.web_dev import WebDevTemplate
-            return WebDevTemplate(clean)
-        if template_name == "api":
-            from autograder.template_library.api_testing import ApiTestingTemplate
-            return ApiTestingTemplate(clean)
-        if template_name == "essay":
-            from autograder.template_library.essay_grader import EssayGraderTemplate
-            return EssayGraderTemplate(clean)
-        if template_name == "io":
-            from autograder.template_library.input_output import InputOutputTemplate
-            return InputOutputTemplate(clean)
-        else:
-            raise ValueError(f"Template '{template_name}' not found.")
-
-    @staticmethod
-    def _load_custom_template_from_content(template_content: str):
-        """Load a custom template directly from string content without file placement."""
-        spec = importlib.util.spec_from_loader("custom_template", loader=None)
-        custom_module = importlib.util.module_from_spec(spec)
-
-        # Execute the template code directly in the module namespace
-        exec(template_content, custom_module.__dict__)
-
-        # Find and return the Template subclass
-        for name, obj in inspect.getmembers(custom_module):
-            if inspect.isclass(obj) and issubclass(obj, Template) and obj is not Template:
-                return obj()
+    def __init__(self):
+        pass
 
-        raise ImportError("No class inheriting from 'Template' found in the custom template content.")
-
-    @staticmethod
-    def _load_custom_template(file_path: str):
-        """Legacy method for file-based custom templates."""
-        spec = importlib.util.spec_from_file_location("custom_template", file_path)
-        custom_module = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(custom_module)
-
-        for name, obj in inspect.getmembers(custom_module):
-            if inspect.isclass(obj) and issubclass(obj, Template) and obj is not Template:
-                return obj()
-
-        raise ImportError(f"No class inheriting from 'Template' found in {file_path}")
-    
-
-    @staticmethod
-    def get_template_info(template_name: str)-> dict:
-        """Gets all the details of a template.
-        param template_name: The name of the template to retrieve.
-        return: A dictionary with all the template details.
-        example:
-        {
-            "name": "I/O",
-            "description": "Template for testing input/output functions.",
-            "tests": [
-                {
-                    "name": "test_function_1",
-                    "description": "Tests function 1 with various inputs.",
-                    "parameters": [
-                        {
-                            "name": "input1",
-                            "description": "Description of input1",
-                            "type": "string"
-                        }
-                    ]
-                }, 
-                ...
+    def start_template(self, template_name: str) -> Template:
+        """Initialize and return the template class based on the template name.
+           If template requires sandboxing, it creates a sandboxed instance.
         """
-        #1. Retrieve an instance of the template from the library
-        template = TemplateLibrary.get_template(template_name, clean=True)
-        if not template:
-            raise ValueError(f"Template '{template_name}' not found.")
-        
-        #2. Prepare the main dictionary with basic template info
-        template_data = {
-            "template_name": template.template_name,
-            "template_description": template.template_description,
-            "tests": []
-        }
+        pass
+
+    def get_template_info(self, template_name: str) -> dict:
+        """Return metadata about the template."""
+        pass
 
-        for test in template.get_tests().values():
-            test_data = {
-                "name": test.name,
-                "description": test.description,
-                "required_file": test.required_file,
-                "parameters": []
-            }
-            for param in test.parameter_description:
-                test_data["parameters"].append({
-                    "name": param.name,
-                    "description": param.description,
-                    "type": param.type
-                })
-            template_data["tests"].append(test_data)
 
-        return template_data

From eeee05cec06afaec2e452310b78f883765738c82 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:58:28 -0300
Subject: [PATCH 34/49] refactor: remove unused main execution blocks and
 legacy code from multiple files

---
 autograder/autograder.py                      |   7 +-
 .../models/dataclass/feedback_preferences.py  | 111 -----------------
 autograder/steps/feedback_step.py             |   9 +-
 autograder/template_library/api_testing.py    | 107 -----------------
 autograder/template_library/input_output.py   | 112 ------------------
 autograder/utils/executors/ai_executor.py     |  12 --
 6 files changed, 6 insertions(+), 352 deletions(-)

diff --git a/autograder/autograder.py b/autograder/autograder.py
index bc2bd25..0c7de6b 100644
--- a/autograder/autograder.py
+++ b/autograder/autograder.py
@@ -1,4 +1,4 @@
-from autograder.services.report.reporter_factory import ReporterFactory
+from autograder.services.report.reporter_service import ReporterService
 from autograder.services.upstash_driver import UpstashDriver
 from autograder.pipeline import AutograderPipeline
 from autograder.steps.export_step import ExporterStep
@@ -17,8 +17,7 @@ def build_pipeline(
                  setup_config = None,
                  custom_template = None,
                  feedback_mode = None,
-                 submission_files = None,
-                 submission_id = None):
+                 submission_files = None):
     """
     Build an autograder pipeline based on configuration.
 
@@ -52,7 +51,7 @@ def build_pipeline(
 
     # Feedback generation (if configured)
     if include_feedback:
-        reporter_service = ReporterFactory.create_reporter_for(feedback_mode,)
+        reporter_service = ReporterService(feedback_mode=feedback_mode)
         pipeline.add_step(FeedbackStep(reporter_service, feedback_config)) # Uses GradingResult to generate feedback and appends it to GradingResult
 
     # Export results
diff --git a/autograder/models/dataclass/feedback_preferences.py b/autograder/models/dataclass/feedback_preferences.py
index 0f8fcaa..938815c 100644
--- a/autograder/models/dataclass/feedback_preferences.py
+++ b/autograder/models/dataclass/feedback_preferences.py
@@ -59,115 +59,4 @@ class FeedbackPreferences:
     ai: AiReporterPreferences = field(default_factory=AiReporterPreferences)
     default: DefaultReporterPreferences = field(default_factory=DefaultReporterPreferences)
 
-    @classmethod
-    def from_dict(cls) -> 'FeedbackPreferences':
-        """
-        Creates a FeedbackPreferences object from a dictionary, with defaults.
-        """
-        request = request_context.get_request()
-        config_dict = request.assignment_config.feedback
 
-        # --- Parse General Preferences, including the new online_content ---
-        general_prefs_data = config_dict.get('general', {}).copy()
-        online_content_data = general_prefs_data.pop('online_content', [])
-
-        # Create LearningResource objects
-        online_resources = [LearningResource(**res) for res in online_content_data]
-        general_prefs_data['online_content'] = online_resources
-        
-        general = GeneralPreferences(**general_prefs_data)
-
-        # --- Parse AI and Default Preferences ---
-        ai_prefs_data = config_dict.get('ai', {})
-        default_prefs_data = config_dict.get('default', {})
-
-        ai = AiReporterPreferences(**ai_prefs_data)
-        default = DefaultReporterPreferences(**default_prefs_data)
-
-        return cls(general=general, ai=ai, default=default)
-
-
-if __name__ == '__main__':
-    feedback_config = {
-        "general": {
-            "report_title": "Relatório Final - Desafio Web",
-            "add_report_summary": True,
-            "online_content": [
-                {
-                    "url": "https://developer.mozilla.org/pt-BR/docs/Web/HTML/Element/img",
-                    "description": "Guia completo sobre a tag <img>.",
-                    "linked_tests": ["check_all_images_have_alt"]
-                }
-            ]
-        },
-        "ai": {
-            "assignment_context": "Este é um desafio focado em HTML semântico e CSS responsivo.",
-            "feedback_persona": "Professor Sênior"
-        },
-        "default": {
-            "category_headers": {
-                "base": "✔️ Requisitos Obrigatórios",
-                "penalty": "🚨 Pontos de Atenção"
-            }
-        }
-    }
-
-    # ===============================================================
-    # 2. CREATE THE PREFERENCES OBJECT FROM THE DICTIONARY
-    # ===============================================================
-    # The .from_dict() method will parse the dictionary and fill in any missing
-    # values with the defaults defined in the class.
-    try:
-        # Note: For standalone testing, you'd need to mock request_context
-        # For now, creating directly for demonstration
-        preferences = FeedbackPreferences(
-            general=GeneralPreferences(
-                report_title="Relatório Final - Desafio Web",
-                add_report_summary=True,
-                online_content=[
-                    LearningResource(
-                        url="https://developer.mozilla.org/pt-BR/docs/Web/HTML/Element/img",
-                        description="Guia completo sobre a tag <img>.",
-                        linked_tests=["check_all_images_have_alt"]
-                    )
-                ]
-            ),
-            ai=AiReporterPreferences(
-                assignment_context="Este é um desafio focado em HTML semântico e CSS responsivo.",
-                feedback_persona="Professor Sênior"
-            ),
-            default=DefaultReporterPreferences(
-                category_headers={
-                    "base": "✔️ Requisitos Obrigatórios",
-                    "penalty": "🚨 Pontos de Atenção"
-                }
-            )
-        )
-
-        # ===============================================================
-        # 3. VERIFY THE PARSED VALUES
-        # ===============================================================
-        print("--- FeedbackPreferences object created successfully ---\n")
-
-        # --- Verify General Preferences ---
-        print("✅ General Preferences:")
-        print(f"  - Report Title: '{preferences.general.report_title}' (Loaded from config)")
-        print(f"  - Show Score: {preferences.general.show_score} (Using default value)")
-        print(f"  - Online Content Items: {len(preferences.general.online_content)} (Loaded from config)")
-        print(f"    - First item URL: {preferences.general.online_content[0].url}")
-        print(f"    - Linked to tests: {preferences.general.online_content[0].linked_tests}")
-
-        # --- Verify AI Preferences ---
-        print("\n🤖 AI Reporter Preferences:")
-        print(f"  - Feedback Persona: '{preferences.ai.feedback_persona}' (Loaded from config)")
-        print(f"  - Feedback Tone: '{preferences.ai.feedback_tone}' (Using default value)")
-        print(f"  - Assignment Context: '{preferences.ai.assignment_context}' (Loaded from config)")
-
-        # --- Verify Default Reporter Preferences ---
-        print("\n📝 Default Reporter Preferences:")
-        print(f"  - Base Header: '{preferences.default.category_headers['base']}' (Loaded from config)")
-        # 'bonus' was not in the config, so it should use the default from the class
-        print(f"  - Bonus Header: '{preferences.default.category_headers['bonus']}' (Using default value)")
-
-    except Exception as e:
-        print(f"An error occurred: {e}")
\ No newline at end of file
diff --git a/autograder/steps/feedback_step.py b/autograder/steps/feedback_step.py
index 41f9dc0..185d39d 100644
--- a/autograder/steps/feedback_step.py
+++ b/autograder/steps/feedback_step.py
@@ -1,20 +1,17 @@
-from autograder.services.report.base_reporter import BaseReporter
 from autograder.models.dataclass.grading_result import GradingResult
 from autograder.models.abstract.step import Step
+from autograder.services.report.reporter_service import ReporterService
 
 
 class FeedbackStep(Step):
     def __init__(self,
-                 reporter_service: BaseReporter,
+                 reporter_service: ReporterService,
                  feedback_config: dict):
         self._reporter_service = reporter_service
         self._feedback_config = feedback_config
 
     def execute(self, input: GradingResult) -> GradingResult:
         """Adds feedback to the grading result using the reporter service."""
-        feedback = self._reporter_service.generate_feedback(
-            grading_result=input,
-            config=self._feedback_config
-        )
+        feedback = self._reporter_service.generate_feedback()
         input.feedback = feedback
         return input
diff --git a/autograder/template_library/api_testing.py b/autograder/template_library/api_testing.py
index 76074ee..3bf43f9 100644
--- a/autograder/template_library/api_testing.py
+++ b/autograder/template_library/api_testing.py
@@ -231,110 +231,3 @@ def get_test(self, name: str) -> TestFunction:
         return test_function
 
 
-if __name__ == "__main__":
-    import sys
-    import os
-
-    # This allows the script to find the other autograder modules
-    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
-    if project_root not in sys.path:
-        sys.path.insert(0, project_root)
-
-    from connectors.models.autograder_request import AutograderRequest
-    from connectors.models.assignment_config import AssignmentConfig
-    from autograder.context import request_context
-
-
-    def create_mock_submission():
-        """Creates the in-memory files for a simple student Express.js API."""
-        package_json = {
-            "name": "student-api", "version": "1.0.0", "main": "server.js",
-            "scripts": {"start": "node server.js"},
-            "dependencies": {"express": "^4.17.1"}
-        }
-        server_js = """
-           const express = require('express');
-           const app = express();
-           const port = 8000;
-
-           app.get('/health', (req, res) => res.status(200).send({ status: 'ok' }));
-           app.get('/api/user', (req, res) => res.json({ userId: 1, name: 'John Doe' }));
-
-           // The second argument '0.0.0.0' is the key.
-           app.listen(port, '0.0.0.0', () => {
-              console.log(`Server listening on port ${port}`);
-            });
-           """
-        return {
-            "package.json": json.dumps(package_json, indent=2),
-            "server.js": server_js
-        }
-
-
-    def create_mock_configs():
-        """Creates the mock setup and criteria configurations."""
-        setup_config = {
-            "runtime_image": "node:18-alpine",
-            "container_port": 8000,
-            "start_command": "node server.js",
-            "commands": {"install_dependencies": "npm install"}
-        }
-        criteria_config = {
-            "base": {
-                "subjects": {
-                    "api_functionality": {
-                        "weight": 100,
-                        "tests": [
-                            {"name": "health_check", "calls": [["/health"]]},
-                            {"name": "check_response_json", "calls": [["/api/user", "userId", 1]]}
-                        ]
-                    }
-                }
-            }
-        }
-        return setup_config, criteria_config
-
-
-    # --- Main Simulation Logic ---
-    logging.info("--- 1. Setting up mock environment ---")
-    submission_files = create_mock_submission()
-    setup_config, criteria_config = create_mock_configs()
-
-    assignment_config = AssignmentConfig(criteria=criteria_config, feedback=None, setup=setup_config)
-    autograder_request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="MockStudent"
-    )
-    request_context.set_request(autograder_request)
-
-    template = None
-    try:
-        logging.info("\n--- 2. Initializing API Testing Template (this will start the sandbox) ---")
-        template = ApiTestingTemplate()
-
-        logging.info("\n--- 3. Running Tests ---")
-
-        health_check_test = template.get_test("health_check")
-        health_result = health_check_test.execute("/health")
-
-        logging.info("\n[Health Check Result]")
-        logging.info(f"  Score: {health_result.score}")
-        logging.info(f"  Report: {health_result.report}")
-
-        json_check_test = template.get_test("check_response_json")
-        json_result = json_check_test.execute("/api/user", "userId", 1)
-
-        logging.info("\n[JSON Check Result]")
-        logging.info(f"  Score: {json_result.score}")
-        logging.info(f"  Report: {json_result.report}")
-
-    except Exception as e:
-        logging.error(f"\nAN ERROR OCCURRED: {e}")
-        import traceback
-        traceback.print_exc()
-
-    finally:
-        if template:
-            logging.info("\n--- 4. Cleaning up sandbox environment ---")
-            template.stop()
diff --git a/autograder/template_library/input_output.py b/autograder/template_library/input_output.py
index 6d8783d..1373fda 100644
--- a/autograder/template_library/input_output.py
+++ b/autograder/template_library/input_output.py
@@ -156,115 +156,3 @@ def get_test(self, name: str) -> TestFunction:
         return test_function
 
 
-if __name__ == "__main__":
-    import sys
-    import os
-
-    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
-    if project_root not in sys.path:
-        sys.path.insert(0, project_root)
-
-    from connectors.models.autograder_request import AutograderRequest
-    from connectors.models.assignment_config import AssignmentConfig
-    from autograder.context import request_context
-
-
-    def create_mock_submission():
-        """Creates an in-memory file for a simple Python calculator."""
-        calculator_py = """
-import sys
-
-def main():
-    try:
-        # Using sys.stdin.readline() is more robust for non-interactive scripts
-        operation = sys.stdin.readline().strip()
-        num1 = float(sys.stdin.readline().strip())
-        num2 = float(sys.stdin.readline().strip())
-
-        if operation == "sum":
-            print(num1 + num2)
-        elif operation == "subtract":
-            print(num1 - num2)
-        else:
-            print("Unknown operation")
-    except (ValueError, IndexError):
-        print("Invalid input")
-
-if __name__ == "__main__":
-    main()
-"""
-        return {"calculator.py": calculator_py}
-
-
-    def create_mock_configs():
-        """Creates the mock setup and criteria configurations."""
-        setup_config = {
-            "runtime_image": "python:3.11-slim",
-            "start_command": "python calculator.py"
-        }
-        criteria_config = {
-            "base": {
-                "subjects": {
-                    "calculation_tests": {
-                        "weight": 100,
-                        "tests": [
-                            {"name": "expect_output", "calls": [[["sum", 2, 2], "4.0"]]},
-                            {"name": "expect_output", "calls": [[["subtract", 10, 5], "5.0"]]}
-                        ]
-                    }
-                }
-            }
-        }
-        return setup_config, criteria_config
-
-
-    # --- Main Simulation Logic ---
-    print("--- 1. Setting up mock environment ---")
-    submission_files = create_mock_submission()
-    setup_config, criteria_config = create_mock_configs()
-
-    assignment_config = AssignmentConfig(criteria=criteria_config, feedback=None, setup=setup_config)
-    autograder_request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="MockStudent"
-    )
-    request_context.set_request(autograder_request)
-
-    template = None
-    try:
-        print("\n--- 2. Initializing Input/Output Template ---")
-        template = InputOutputTemplate()
-
-        print("\n--- 3. Running Tests ---")
-
-        # Test 1: Sum (Will pass)
-        test_func = template.get_test("expect_output")
-        sum_result = test_func.execute(["sum", 2, 2], "4.0")
-        print("\n[Sum Test Result]")
-        print(f"  Score: {sum_result.score}")
-        print(f"  Report: {sum_result.report}")
-
-        # Test 2: Sum (Will fail)
-        test_func = template.get_test("expect_output")
-        sum_result = test_func.execute(["sum", 2, 2], "3.0")
-        print("\n[Sum Test Result]")
-        print(f"  Score: {sum_result.score}")
-        print(f"  Report: {sum_result.report}")
-
-        # Test 2: Subtract
-        subtract_result = test_func.execute(["subtract", 10, 5], "5.0")
-        print("\n[Subtract Test Result]")
-        print(f"  Score: {subtract_result.score}")
-        print(f"  Report: {subtract_result.report}")
-
-    except Exception as e:
-        print(f"\nAN ERROR OCCURRED: {e}")
-        import traceback
-
-        traceback.print_exc()
-
-    finally:
-        if template:
-            print("\n--- 4. Cleaning up sandbox environment ---")
-            template.stop()
diff --git a/autograder/utils/executors/ai_executor.py b/autograder/utils/executors/ai_executor.py
index 343024f..fe6627a 100644
--- a/autograder/utils/executors/ai_executor.py
+++ b/autograder/utils/executors/ai_executor.py
@@ -182,15 +182,3 @@ def stop(self):
 
 
 
-if __name__ == "__main__":
-    text = {"text.txt":"""Artificial intelligence (AI) is no longer a concept confined to science fiction; it is a transformative force actively reshaping industries and redefining the nature of work. Its integration into the modern workforce presents a profound duality: on one hand, it offers unprecedented opportunities for productivity and innovation, while on the other, it poses significant challenges related to job displacement and economic inequality. Navigating this transition successfully requires a proactive and nuanced approach from policymakers, businesses, and individuals alike.
-The primary benefit of AI in the workplace is its capacity to augment human potential and drive efficiency. AI-powered systems can analyze vast datasets in seconds, automating routine cognitive and manual tasks, which frees human workers to focus on more complex, creative, and strategic endeavors. For instance, in medicine, AI algorithms assist radiologists in detecting tumors with greater accuracy, while in finance, they identify fraudulent transactions far more effectively than any human team. This collaboration between human and machine not only boosts output but also creates new roles centered around AI development, ethics, and system maintenance—jobs that did not exist a decade ago.
-However, this technological advancement casts a significant shadow of disruption. The same automation that drives efficiency also leads to job displacement, particularly for roles characterized by repetitive tasks. Assembly line workers, data entry clerks, and even some paralegal roles face a high risk of obsolescence. This creates a widening skills gap, where demand for high-level technical skills soars while demand for traditional skills plummets. Without robust mechanisms for reskilling and upskilling the existing workforce, this gap threatens to exacerbate socio-economic inequality, creating a divide between those who can command AI and those who are displaced by it. There are many gramatical errors in this sentence, for testing purposes.
-The most critical challenge, therefore, is not to halt technological progress but to manage its societal impact. A multi-pronged strategy is essential. Governments and educational institutions must collaborate to reform curricula, emphasizing critical thinking, digital literacy, and lifelong learning. Furthermore, corporations have a responsibility to invest in their employees through continuous training programs. Finally, strengthening social safety nets, perhaps through concepts like Universal Basic Income (UBI) or enhanced unemployment benefits, may be necessary to support individuals as they navigate this volatile transition period.
-In conclusion, AI is a double-edged sword. Its potential to enhance productivity and create new avenues for growth is undeniable, but so are the risks of displacement and inequality. The future of work will not be a battle of humans versus machines, but rather a story of adaptation. By investing in education, promoting equitable policies, and fostering a culture of continuous learning, we can harness the power of AI to build a more prosperous and inclusive workforce for all."""}
-
-    ai_executor.add_test("Content: Identify Specific Examples","In a scale of 0 to 100, how well does the text provide specific examples to support its main points? Consider the relevance and clarity of the examples given.")
-    ai_executor.add_test("Clarity: Evaluate Overall Clarity","On a scale from 0 to 100, how clear and understandable is the text? Consider the organization of ideas, sentence structure, and use of language.")
-    ai_executor.add_test("Grammar: Check for Grammatical Errors","On a scale from 0 to 100, how free is the text from grammatical errors? Consider issues such as subject-verb agreement, punctuation, and sentence fragments.")
-    ai_executor.add_test("Engagement: Assess Reader Engagement","On a scale from 0 to 100, how engaging is the text? Consider the use of anecdotes, rhetorical questions, and vivid language that captures the reader's interest.")
-    results = ai_executor.stop()

From d0b30f8f75d21e1aee18b608b107e30963f721ce Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 19:59:15 -0300
Subject: [PATCH 35/49] refactor: delete current tests

---
 tests/__init__.py                             |   0
 ...der_API_Collection.postman_collection.json | 329 -----------
 tests/data/README.md                          | 510 ------------------
 tests/data/api_request_schema.json            | 266 ---------
 tests/data/api_testing/criteria.json          |  37 --
 tests/data/api_testing/feedback.json          |  14 -
 tests/data/api_testing/package.json           |  12 -
 tests/data/api_testing/server.js              |  56 --
 tests/data/api_testing/setup.json             |   8 -
 tests/data/curl_examples.sh                   | 246 ---------
 tests/data/custom_template/criteria.json      |   0
 tests/data/custom_template/custom_template.py | 116 ----
 tests/data/custom_template/feedback.json      |  12 -
 tests/data/custom_template/main.py            |   0
 tests/data/essay/criteria.json                |   0
 tests/data/essay/essay.txt                    |   0
 tests/data/essay/feedback.json                |   0
 tests/data/input_output/calculator.py         |  46 --
 tests/data/input_output/criteria.json         |   0
 tests/data/input_output/feedback.json         |  14 -
 tests/data/input_output/requirements.txt      |   0
 tests/data/input_output/setup.json            |   0
 tests/data/web_dev/criteria.json              |  35 --
 tests/data/web_dev/feedback.json              |  15 -
 tests/data/web_dev/index.html                 |  39 --
 tests/data/web_dev/script.js                  |  33 --
 tests/data/web_dev/style.css                  |   0
 tests/playroom.py                             |  46 --
 tests/playroom/README.md                      | 286 ----------
 tests/playroom/__init__.py                    |  10 -
 tests/playroom/api_playroom.py                | 250 ---------
 tests/playroom/essay_playroom.py              | 287 ----------
 tests/playroom/io_playroom.py                 | 226 --------
 tests/playroom/run_all_playrooms.py           |   0
 tests/playroom/webdev_playroom.py             | 241 ---------
 tests/test_pipeline_modes.py                  | 252 ---------
 tests/unit/__init__.py                        |   0
 tests/unit/test_pipeline_steps.py             | 328 -----------
 38 files changed, 3714 deletions(-)
 delete mode 100644 tests/__init__.py
 delete mode 100644 tests/data/Autograder_API_Collection.postman_collection.json
 delete mode 100644 tests/data/README.md
 delete mode 100644 tests/data/api_request_schema.json
 delete mode 100644 tests/data/api_testing/criteria.json
 delete mode 100644 tests/data/api_testing/feedback.json
 delete mode 100644 tests/data/api_testing/package.json
 delete mode 100644 tests/data/api_testing/server.js
 delete mode 100644 tests/data/api_testing/setup.json
 delete mode 100644 tests/data/curl_examples.sh
 delete mode 100644 tests/data/custom_template/criteria.json
 delete mode 100644 tests/data/custom_template/custom_template.py
 delete mode 100644 tests/data/custom_template/feedback.json
 delete mode 100644 tests/data/custom_template/main.py
 delete mode 100644 tests/data/essay/criteria.json
 delete mode 100644 tests/data/essay/essay.txt
 delete mode 100644 tests/data/essay/feedback.json
 delete mode 100644 tests/data/input_output/calculator.py
 delete mode 100644 tests/data/input_output/criteria.json
 delete mode 100644 tests/data/input_output/feedback.json
 delete mode 100644 tests/data/input_output/requirements.txt
 delete mode 100644 tests/data/input_output/setup.json
 delete mode 100644 tests/data/web_dev/criteria.json
 delete mode 100644 tests/data/web_dev/feedback.json
 delete mode 100644 tests/data/web_dev/index.html
 delete mode 100644 tests/data/web_dev/script.js
 delete mode 100644 tests/data/web_dev/style.css
 delete mode 100644 tests/playroom.py
 delete mode 100644 tests/playroom/README.md
 delete mode 100644 tests/playroom/__init__.py
 delete mode 100644 tests/playroom/api_playroom.py
 delete mode 100644 tests/playroom/essay_playroom.py
 delete mode 100644 tests/playroom/io_playroom.py
 delete mode 100644 tests/playroom/run_all_playrooms.py
 delete mode 100644 tests/playroom/webdev_playroom.py
 delete mode 100644 tests/test_pipeline_modes.py
 delete mode 100644 tests/unit/__init__.py
 delete mode 100644 tests/unit/test_pipeline_steps.py

diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/Autograder_API_Collection.postman_collection.json b/tests/data/Autograder_API_Collection.postman_collection.json
deleted file mode 100644
index 8336d7f..0000000
--- a/tests/data/Autograder_API_Collection.postman_collection.json
+++ /dev/null
@@ -1,329 +0,0 @@
-{
-  "info": {
-    "name": "Autograder API Collection",
-    "description": "Complete API collection for testing the Autograder API with various templates and scenarios",
-    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
-  },
-  "variable": [
-    {
-      "key": "base_url",
-      "value": "http://localhost:8001",
-      "type": "string"
-    }
-  ],
-  "item": [
-    {
-      "name": "Grade Submission - Web Dev",
-      "request": {
-        "method": "POST",
-        "header": [],
-        "body": {
-          "mode": "formdata",
-          "formdata": [
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/web_dev/index.html"
-            },
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/web_dev/style.css"
-            },
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/web_dev/script.js"
-            },
-            {
-              "key": "criteria_json",
-              "type": "file",
-              "src": "tests/data/web_dev/criteria.json"
-            },
-            {
-              "key": "feedback_json",
-              "type": "file",
-              "src": "tests/data/web_dev/feedback.json"
-            },
-            {
-              "key": "template_preset",
-              "value": "web dev",
-              "type": "text"
-            },
-            {
-              "key": "student_name",
-              "value": "John Doe",
-              "type": "text"
-            },
-            {
-              "key": "student_credentials",
-              "value": "test-token-123",
-              "type": "text"
-            },
-            {
-              "key": "include_feedback",
-              "value": "true",
-              "type": "text"
-            },
-            {
-              "key": "feedback_type",
-              "value": "default",
-              "type": "text"
-            }
-          ]
-        },
-        "url": {
-          "raw": "{{base_url}}/grade_submission/",
-          "host": ["{{base_url}}"],
-          "path": ["grade_submission", ""]
-        },
-        "description": "Test web development template with HTML, CSS, and JavaScript files"
-      },
-      "response": []
-    },
-    {
-      "name": "Grade Submission - API Testing",
-      "request": {
-        "method": "POST",
-        "header": [],
-        "body": {
-          "mode": "formdata",
-          "formdata": [
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/api_testing/server.js"
-            },
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/api_testing/package.json"
-            },
-            {
-              "key": "criteria_json",
-              "type": "file",
-              "src": "tests/data/api_testing/criteria.json"
-            },
-            {
-              "key": "feedback_json",
-              "type": "file",
-              "src": "tests/data/api_testing/feedback.json"
-            },
-            {
-              "key": "setup_json",
-              "type": "file",
-              "src": "tests/data/api_testing/setup.json"
-            },
-            {
-              "key": "template_preset",
-              "value": "api",
-              "type": "text"
-            },
-            {
-              "key": "student_name",
-              "value": "Jane Smith",
-              "type": "text"
-            },
-            {
-              "key": "student_credentials",
-              "value": "test-token-456",
-              "type": "text"
-            },
-            {
-              "key": "include_feedback",
-              "value": "true",
-              "type": "text"
-            },
-            {
-              "key": "feedback_type",
-              "value": "default",
-              "type": "text"
-            }
-          ]
-        },
-        "url": {
-          "raw": "{{base_url}}/grade_submission/",
-          "host": ["{{base_url}}"],
-          "path": ["grade_submission", ""]
-        },
-        "description": "Test API testing template with Node.js Express server"
-      },
-      "response": []
-    },
-    {
-      "name": "Grade Submission - Input/Output",
-      "request": {
-        "method": "POST",
-        "header": [],
-        "body": {
-          "mode": "formdata",
-          "formdata": [
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/input_output/calculator.py"
-            },
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/input_output/requirements.txt"
-            },
-            {
-              "key": "criteria_json",
-              "type": "file",
-              "src": "tests/data/input_output/criteria.json"
-            },
-            {
-              "key": "feedback_json",
-              "type": "file",
-              "src": "tests/data/input_output/feedback.json"
-            },
-            {
-              "key": "setup_json",
-              "type": "file",
-              "src": "tests/data/input_output/setup.json"
-            },
-            {
-              "key": "template_preset",
-              "value": "io",
-              "type": "text"
-            },
-            {
-              "key": "student_name",
-              "value": "Bob Johnson",
-              "type": "text"
-            },
-            {
-              "key": "student_credentials",
-              "value": "test-token-789",
-              "type": "text"
-            },
-            {
-              "key": "include_feedback",
-              "value": "true",
-              "type": "text"
-            },
-            {
-              "key": "feedback_type",
-              "value": "default",
-              "type": "text"
-            }
-          ]
-        },
-        "url": {
-          "raw": "{{base_url}}/grade_submission/",
-          "host": ["{{base_url}}"],
-          "path": ["grade_submission", ""]
-        },
-        "description": "Test input/output template with Python calculator"
-      },
-      "response": []
-    },
-    {
-      "name": "Grade Submission - Custom Template",
-      "request": {
-        "method": "POST",
-        "header": [],
-        "body": {
-          "mode": "formdata",
-          "formdata": [
-            {
-              "key": "submission_files",
-              "type": "file",
-              "src": "tests/data/custom_template/main.py"
-            },
-            {
-              "key": "criteria_json",
-              "type": "file",
-              "src": "tests/data/custom_template/criteria.json"
-            },
-            {
-              "key": "feedback_json",
-              "type": "file",
-              "src": "tests/data/custom_template/feedback.json"
-            },
-            {
-              "key": "custom_template",
-              "type": "file",
-              "src": "tests/data/custom_template/custom_template.py"
-            },
-            {
-              "key": "template_preset",
-              "value": "custom",
-              "type": "text"
-            },
-            {
-              "key": "student_name",
-              "value": "Alice Williams",
-              "type": "text"
-            },
-            {
-              "key": "student_credentials",
-              "value": "test-token-101",
-              "type": "text"
-            },
-            {
-              "key": "include_feedback",
-              "value": "true",
-              "type": "text"
-            },
-            {
-              "key": "feedback_type",
-              "value": "default",
-              "type": "text"
-            }
-          ]
-        },
-        "url": {
-          "raw": "{{base_url}}/grade_submission/",
-          "host": ["{{base_url}}"],
-          "path": ["grade_submission", ""]
-        },
-        "description": "Test custom template with custom grading logic"
-      },
-      "response": []
-    },
-    {
-      "name": "Get Template Info - Web Dev",
-      "request": {
-        "method": "GET",
-        "header": [],
-        "url": {
-          "raw": "{{base_url}}/templates/webdev",
-          "host": ["{{base_url}}"],
-          "path": ["template", "web_dev"]
-        },
-        "description": "Get information about the web development template"
-      },
-      "response": []
-    },
-    {
-      "name": "Get Template Info - API",
-      "request": {
-        "method": "GET",
-        "header": [],
-        "url": {
-          "raw": "{{base_url}}/templates/api",
-          "host": ["{{base_url}}"],
-          "path": ["template", "api"]
-        },
-        "description": "Get information about the API testing template"
-      },
-      "response": []
-    },
-    {
-      "name": "Get Template Info - Input/Output",
-      "request": {
-        "method": "GET",
-        "header": [],
-        "url": {
-          "raw": "{{base_url}}/templates/io",
-          "host": ["{{base_url}}"],
-          "path": ["template", "io"]
-        },
-        "description": "Get information about the input/output template"
-      },
-      "response": []
-    }
-  ]
-}
diff --git a/tests/data/README.md b/tests/data/README.md
deleted file mode 100644
index df71884..0000000
--- a/tests/data/README.md
+++ /dev/null
@@ -1,510 +0,0 @@
-# Autograder API Test Suite
-
-This directory contains comprehensive test data and a testing script for the Autograder API. The test suite covers all supported template types and provides realistic submission scenarios.
-
-## 📁 Directory Structure
-
-```
-tests/data/
-├── web_dev/              # Web Development template test data
-│   ├── index.html        # Student HTML submission
-│   ├── style.css         # Student CSS submission
-│   ├── script.js         # Student JavaScript submission
-│   ├── criteria.json     # Grading criteria configuration
-│   └── feedback.json     # Feedback configuration
-│
-├── api_testing/          # API Testing template test data
-│   ├── server.js         # Student Node.js API server
-│   ├── package.json      # NPM dependencies
-│   ├── criteria.json     # API testing criteria
-│   ├── feedback.json     # Feedback configuration
-│   └── setup.json        # Container setup (runtime, commands)
-│
-├── input_output/         # Input/Output template test data
-│   ├── calculator.py     # Student Python program
-│   ├── requirements.txt  # Python dependencies
-│   ├── criteria.json     # I/O testing criteria
-│   ├── feedback.json     # Feedback configuration
-│   └── setup.json        # Container setup
-│
-├── essay/                # Essay template test data
-│   ├── essay.txt         # Student essay text
-│   ├── criteria.json     # Essay grading criteria
-│   └── feedback.json     # Feedback configuration
-│
-└── custom_template/      # Custom Template test data
-    ├── main.py           # Student Python submission
-    ├── custom_template.py # Custom grading template
-    ├── criteria.json     # Custom criteria
-    └── feedback.json     # Feedback configuration
-```
-
-## 🚀 Quick Start
-
-### Prerequisites
-
-```bash
-# Install required Python package
-pip install requests
-```
-
-### Running Tests
-
-**Interactive Menu Mode:**
-```bash
-python test_api_requests.py
-```
-
-**Direct Test Execution:**
-```bash
-# Test specific template
-python test_api_requests.py --test web
-python test_api_requests.py --test api
-python test_api_requests.py --test io
-python test_api_requests.py --test essay
-python test_api_requests.py --test custom
-
-# Run all tests
-python test_api_requests.py --test all
-```
-
-**Custom API URL:**
-```bash
-python test_api_requests.py --url http://api.example.com:8000
-```
-
-## 📋 API Endpoints
-
-### 1. Grade Submission (POST)
-
-**Endpoint:** `/grade_submission/`
-
-**Request Format:**
-- Method: `POST`
-- Content-Type: `multipart/form-data`
-
-**Form Fields:**
-
-| Field | Type | Required | Description |
-|-------|------|----------|-------------|
-| `submission_files` | File[] | ✅ | Student's source code files |
-| `template_preset` | String | ✅ | Template type: "web dev", "api", "io", "essay", "custom" |
-| `student_name` | String | ✅ | Student's name |
-| `student_credentials` | String | ✅ | GitHub token or credentials |
-| `include_feedback` | Boolean | ✅ | Whether to include detailed feedback |
-| `criteria_json` | File | ✅ | JSON file with grading criteria |
-| `feedback_type` | String | ⚠️ | "default" or "ai" (default: "default") |
-| `feedback_json` | File | ⚠️ | JSON file with feedback configuration |
-| `setup_json` | File | ⚠️ | JSON file for container setup (required for api/io) |
-| `custom_template` | File | ⚠️ | Python file with custom template (required for "custom") |
-| `openai_key` | String | ⚠️ | OpenAI API key (for AI feedback) |
-| `redis_url` | String | ⚠️ | Redis URL (for AI feedback caching) |
-| `redis_token` | String | ⚠️ | Redis token |
-
-**Response Format:**
-```json
-{
-  "server_status": "Server connection happened successfully",
-  "autograding_status": "completed",
-  "final_score": 85.5,
-  "feedback": "...",
-  "test_report": [
-    {
-      "name": "has_tag",
-      "score": 100,
-      "report": "Found 5 of 5 required div tags",
-      "parameters": {"tag": "div", "required_count": 5}
-    }
-  ]
-}
-```
-
-### 2. Get Template Info (GET)
-
-**Endpoint:** `/template/{template_name}`
-
-**Example:**
-```bash
-GET /templates/webdev
-GET /templates/api
-GET /templates/io
-GET /templates/essay
-```
-
-**Response:** Returns template metadata including available tests and their parameters.
-
-## 📦 Payload Examples
-
-### 1. Web Development Template
-
-**Template:** `web dev`  
-**Files:** HTML, CSS, JavaScript  
-**No Setup Required:** Tests run directly on static files
-
-**Payload Structure:**
-```python
-files = [
-    ('submission_files', ('index.html', html_content, 'text/plain')),
-    ('submission_files', ('style.css', css_content, 'text/plain')),
-    ('submission_files', ('script.js', js_content, 'text/plain')),
-    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
-    ('feedback_json', ('feedback.json', feedback_content, 'application/json'))
-]
-
-data = {
-    'template_preset': 'web dev',
-    'student_name': 'John Doe',
-    'student_credentials': 'token-123',
-    'include_feedback': 'true',
-    'feedback_type': 'default'
-}
-```
-
-**Criteria Example:**
-```json
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "html_structure": {
-        "weight": 40,
-        "tests": [
-          {
-            "name": "has_tag",
-            "file": "index.html",
-            "calls": [
-              ["div", 5],
-              ["h1", 2],
-              ["p", 3]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
-```
-
-### 2. API Testing Template
-
-**Template:** `api`  
-**Files:** server.js, package.json  
-**Requires:** setup.json with Docker configuration
-
-**Payload Structure:**
-```python
-files = [
-    ('submission_files', ('server.js', server_content, 'text/plain')),
-    ('submission_files', ('package.json', package_content, 'text/plain')),
-    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
-    ('feedback_json', ('feedback.json', feedback_content, 'application/json')),
-    ('setup_json', ('setup.json', setup_content, 'application/json'))
-]
-
-data = {
-    'template_preset': 'api',
-    'student_name': 'Jane Smith',
-    'student_credentials': 'token-456',
-    'include_feedback': 'true',
-    'feedback_type': 'default'
-}
-```
-
-**Setup Example:**
-```json
-{
-  "runtime_image": "node:18-alpine",
-  "container_port": 8000,
-  "start_command": "node server.js",
-  "commands": {
-    "install_dependencies": "npm install"
-  }
-}
-```
-
-**Criteria Example:**
-```json
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "api_endpoints": {
-        "weight": 100,
-        "tests": [
-          {
-            "name": "health_check",
-            "calls": [["/health"]]
-          },
-          {
-            "name": "check_response_json",
-            "calls": [
-              ["/api/user/1", "id", 1],
-              ["/api/user/1", "name", "John Doe"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
-```
-
-### 3. Input/Output Template
-
-**Template:** `io`  
-**Files:** Python script, requirements.txt  
-**Requires:** setup.json with Docker configuration
-
-**Payload Structure:**
-```python
-files = [
-    ('submission_files', ('calculator.py', program_content, 'text/plain')),
-    ('submission_files', ('requirements.txt', requirements, 'text/plain')),
-    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
-    ('feedback_json', ('feedback.json', feedback_content, 'application/json')),
-    ('setup_json', ('setup.json', setup_content, 'application/json'))
-]
-
-data = {
-    'template_preset': 'io',
-    'student_name': 'Bob Johnson',
-    'student_credentials': 'token-789',
-    'include_feedback': 'true',
-    'feedback_type': 'default'
-}
-```
-
-**Criteria Example:**
-```json
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "basic_operations": {
-        "weight": 100,
-        "tests": [
-          {
-            "name": "expect_output",
-            "calls": [
-              [["add", "5", "3"], "8"],
-              [["subtract", "10", "4"], "6"]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
-```
-
-### 4. Essay Template
-
-**Template:** `essay`  
-**Files:** Plain text essay  
-**No Setup Required:** Graded based on text content
-
-**Payload Structure:**
-```python
-files = [
-    ('submission_files', ('essay.txt', essay_content, 'text/plain')),
-    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
-    ('feedback_json', ('feedback.json', feedback_content, 'application/json'))
-]
-
-data = {
-    'template_preset': 'essay',
-    'student_name': 'Chris Lee',
-    'student_credentials': 'token-202',
-    'include_feedback': 'true',
-    'feedback_type': 'default'
-}
-```
-
-**Criteria Example:**
-```json
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "content_quality": {
-        "weight": 70,
-        "tests": [
-          {
-            "name": "check_keyword",
-            "file": "essay.txt",
-            "calls": [
-              ["introduction", 1],
-              ["conclusion", 1]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
-```
-
-### 5. Custom Template
-
-**Template:** `custom`  
-**Files:** Student submission + custom_template.py  
-**Requires:** Custom Python template file
-
-**Payload Structure:**
-```python
-files = [
-    ('submission_files', ('main.py', student_code, 'text/plain')),
-    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
-    ('feedback_json', ('feedback.json', feedback_content, 'application/json')),
-    ('custom_template', ('template.py', template_code, 'text/plain'))
-]
-
-data = {
-    'template_preset': 'custom',
-    'student_name': 'Alice Williams',
-    'student_credentials': 'token-101',
-    'include_feedback': 'true',
-    'feedback_type': 'default'
-}
-```
-
-## 🧪 Test Scenarios
-
-### Scenario 1: Web Development Portfolio
-Tests HTML structure, CSS styling, and JavaScript functionality for a student portfolio website.
-
-**Expected Results:**
-- ✅ HTML semantic tags detected
-- ✅ CSS classes and properties validated
-- ✅ JavaScript event listeners found
-- ✅ No console errors
-
-### Scenario 2: REST API Server
-Tests a Node.js Express API with multiple endpoints and JSON responses.
-
-**Expected Results:**
-- ✅ Health check endpoint responds
-- ✅ User data endpoints return correct JSON
-- ✅ POST requests create resources
-
-### Scenario 3: Python Calculator
-Tests a command-line calculator program with various mathematical operations.
-
-**Expected Results:**
-- ✅ Addition operation works correctly
-- ✅ Subtraction operation works correctly
-- ✅ Edge cases handled (division by zero, etc.)
-
-### Scenario 4: Essay Evaluation
-Evaluates a student's essay based on content quality, keyword presence, and structure.
-
-**Expected Results:**
-- ✅ Introduction and conclusion paragraphs present
-- ✅ Required keywords found
-- ✅ No spelling or grammar errors
-
-### Scenario 5: Custom Template
-Tests a custom grading template that checks for file existence and function definitions.
-
-**Expected Results:**
-- ✅ Required files present
-- ✅ Required functions defined
-
-## 🔧 Troubleshooting
-
-### Connection Errors
-```
-❌ ERROR: Could not connect to API at http://localhost:8001
-```
-**Solution:** Ensure the API server is running:
-```bash
-cd autograder/connectors/adapters/api
-python api_entrypoint.py
-```
-
-### Missing Test Data
-```
-FileNotFoundError: Test directory not found
-```
-**Solution:** Ensure you're running the script from the project root:
-```bash
-cd /path/to/autograder
-python test_api_requests.py
-```
-
-### Timeout Errors
-```
-❌ ERROR: Request timed out
-```
-**Solution:** 
-- Increase timeout in the script (default: 120 seconds)
-- Check if Docker containers are running properly
-- Verify network connectivity
-
-## 📊 Understanding Results
-
-### Score Interpretation
-- **100**: Perfect score - all tests passed
-- **0-99**: Partial score - some tests passed
-- **0**: Failed - no tests passed
-
-### Test Report Format
-Each test in the report includes:
-- `name`: Test function name
-- `score`: Score out of 100
-- `report`: Human-readable description
-- `parameters`: Test parameters used
-
-### Feedback Types
-- **default**: Standard feedback based on test results
-- **ai**: AI-generated feedback (requires OpenAI API key)
-
-## 🚀 AWS Lambda Deployment
-
-For deploying to AWS Lambda, the payload structure remains the same. However:
-
-1. **Base64 Encoding**: File contents must be base64 encoded
-2. **API Gateway**: Use multipart/form-data or JSON with base64 strings
-3. **Timeout**: Set Lambda timeout to at least 5 minutes for complex tests
-4. **Memory**: Allocate at least 2GB RAM for Docker operations
-
-**Example Lambda Payload:**
-```json
-{
-  "template_preset": "web dev",
-  "student_name": "John Doe",
-  "student_credentials": "token-123",
-  "include_feedback": true,
-  "submission_files": [
-    {
-      "filename": "index.html",
-      "content": "base64_encoded_content_here"
-    }
-  ],
-  "criteria": { /* criteria JSON */ },
-  "feedback": { /* feedback JSON */ }
-}
-```
-
-## 📝 Notes
-
-- All test data is realistic and follows best practices
-- Tests are designed to pass with provided submissions
-- Modify criteria.json to test different scenarios
-- Use setup.json for templates requiring runtime environments
-- Custom templates must inherit from the Template base class
-
-## 🤝 Contributing
-
-To add new test scenarios:
-
-1. Create a new directory under `tests/data/`
-2. Add submission files and configuration JSONs
-3. Update `test_api_requests.py` with a new test method
-4. Add the test to the interactive menu
-
-## 📚 Additional Resources
-
-- [API Documentation](../docs/api_reference.md)
-- [Template Guide](../docs/creating_assignments.md)
-- [Configuration Rules](../docs/CONFIGURATION_RULES.md)
diff --git a/tests/data/api_request_schema.json b/tests/data/api_request_schema.json
deleted file mode 100644
index d5ef75b..0000000
--- a/tests/data/api_request_schema.json
+++ /dev/null
@@ -1,266 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "Autograder API Request Schema",
-  "description": "JSON Schema for validating Autograder API requests",
-  "type": "object",
-  "properties": {
-    "template_preset": {
-      "type": "string",
-      "enum": ["web dev", "api", "io", "custom", "essay"],
-      "description": "The grading template to use"
-    },
-    "student_name": {
-      "type": "string",
-      "minLength": 1,
-      "description": "Name or identifier of the student"
-    },
-    "student_credentials": {
-      "type": "string",
-      "description": "Authentication token or credentials"
-    },
-    "include_feedback": {
-      "type": "boolean",
-      "description": "Whether to include detailed feedback in the response"
-    },
-    "feedback_type": {
-      "type": "string",
-      "enum": ["default", "ai"],
-      "default": "default",
-      "description": "Type of feedback generation"
-    },
-    "openai_key": {
-      "type": "string",
-      "description": "OpenAI API key (required if feedback_type is 'ai')"
-    },
-    "redis_url": {
-      "type": "string",
-      "format": "uri",
-      "description": "Redis connection URL for caching"
-    },
-    "redis_token": {
-      "type": "string",
-      "description": "Redis authentication token"
-    }
-  },
-  "required": [
-    "template_preset",
-    "student_name",
-    "student_credentials",
-    "include_feedback"
-  ],
-  "allOf": [
-    {
-      "if": {
-        "properties": {
-          "feedback_type": {"const": "ai"}
-        }
-      },
-      "then": {
-        "required": ["openai_key"]
-      }
-    }
-  ],
-  "definitions": {
-    "criteria_schema": {
-      "type": "object",
-      "properties": {
-        "base": {
-          "type": "object",
-          "properties": {
-            "weight": {
-              "type": "number",
-              "minimum": 0,
-              "maximum": 100
-            },
-            "subjects": {
-              "type": "object",
-              "patternProperties": {
-                "^[a-zA-Z_][a-zA-Z0-9_]*$": {
-                  "type": "object",
-                  "properties": {
-                    "weight": {
-                      "type": "number",
-                      "minimum": 0,
-                      "maximum": 100
-                    },
-                    "tests": {
-                      "type": "array",
-                      "items": {
-                        "type": "object",
-                        "properties": {
-                          "name": {
-                            "type": "string",
-                            "description": "Test function name"
-                          },
-                          "file": {
-                            "type": "string",
-                            "description": "Target file for the test"
-                          },
-                          "calls": {
-                            "type": "array",
-                            "items": {
-                              "type": "array",
-                              "description": "Array of parameters for each test call"
-                            }
-                          }
-                        },
-                        "required": ["name", "calls"]
-                      }
-                    }
-                  },
-                  "required": ["weight", "tests"]
-                }
-              }
-            }
-          },
-          "required": ["weight", "subjects"]
-        }
-      },
-      "required": ["base"]
-    },
-    "feedback_schema": {
-      "type": "object",
-      "properties": {
-        "general": {
-          "type": "object",
-          "properties": {
-            "report_title": {
-              "type": "string",
-              "description": "Title for the feedback report"
-            },
-            "show_passed_tests": {
-              "type": "boolean",
-              "description": "Whether to show passed tests in the report"
-            },
-            "show_test_details": {
-              "type": "boolean",
-              "description": "Whether to show detailed test information"
-            }
-          }
-        },
-        "default": {
-          "type": "object",
-          "properties": {
-            "category_headers": {
-              "type": "object",
-              "patternProperties": {
-                "^[a-zA-Z_][a-zA-Z0-9_]*$": {
-                  "type": "string",
-                  "description": "Custom header for a category"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "setup_schema": {
-      "type": "object",
-      "properties": {
-        "runtime_image": {
-          "type": "string",
-          "description": "Docker image to use for execution",
-          "examples": ["node:18-alpine", "python:3.11-slim"]
-        },
-        "container_port": {
-          "type": "integer",
-          "minimum": 1,
-          "maximum": 65535,
-          "description": "Port number inside the container"
-        },
-        "start_command": {
-          "type": "string",
-          "description": "Command to start the application",
-          "examples": ["node server.js", "python app.py"]
-        },
-        "commands": {
-          "type": "object",
-          "properties": {
-            "install_dependencies": {
-              "type": "string",
-              "description": "Command to install dependencies",
-              "examples": ["npm install", "pip install -r requirements.txt"]
-            }
-          }
-        }
-      },
-      "required": ["runtime_image", "start_command"]
-    },
-    "response_schema": {
-      "type": "object",
-      "properties": {
-        "server_status": {
-          "type": "string",
-          "description": "Status of the server connection"
-        },
-        "autograding_status": {
-          "type": "string",
-          "enum": ["completed", "failed", "partial"],
-          "description": "Overall status of the autograding process"
-        },
-        "final_score": {
-          "type": "number",
-          "minimum": 0,
-          "maximum": 100,
-          "description": "Final calculated score"
-        },
-        "feedback": {
-          "type": "string",
-          "description": "Generated feedback text"
-        },
-        "test_report": {
-          "type": "array",
-          "items": {
-            "type": "object",
-            "properties": {
-              "name": {
-                "type": "string",
-                "description": "Test function name"
-              },
-              "score": {
-                "type": "number",
-                "minimum": 0,
-                "maximum": 100,
-                "description": "Score for this test"
-              },
-              "report": {
-                "type": "string",
-                "description": "Detailed test report"
-              },
-              "parameters": {
-                "type": "object",
-                "description": "Parameters used for the test"
-              }
-            },
-            "required": ["name", "score", "report"]
-          }
-        }
-      },
-      "required": ["server_status", "autograding_status", "final_score", "test_report"]
-    }
-  },
-  "examples": [
-    {
-      "template_preset": "web dev",
-      "student_name": "John Doe",
-      "student_credentials": "token-123",
-      "include_feedback": true,
-      "feedback_type": "default"
-    },
-    {
-      "template_preset": "api",
-      "student_name": "Jane Smith",
-      "student_credentials": "token-456",
-      "include_feedback": true,
-      "feedback_type": "default"
-    },
-    {
-      "template_preset": "custom",
-      "student_name": "Alice Williams",
-      "student_credentials": "token-789",
-      "include_feedback": true,
-      "feedback_type": "ai",
-      "openai_key": "sk-..."
-    }
-  ]
-}
diff --git a/tests/data/api_testing/criteria.json b/tests/data/api_testing/criteria.json
deleted file mode 100644
index 6ec1e78..0000000
--- a/tests/data/api_testing/criteria.json
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "api_endpoints": {
-        "weight": 50,
-        "tests": [
-          {
-            "name": "health_check",
-            "calls": [
-              ["/health"]
-            ]
-          },
-          {
-            "name": "check_response_json",
-            "calls": [
-              ["/api/users", "users", []],
-              ["/api/user/1", "id", 1],
-              ["/api/user/1", "name", "John Doe"]
-            ]
-          }
-        ]
-      },
-      "api_methods": {
-        "weight": 50,
-        "tests": [
-          {
-            "name": "check_post_request",
-            "calls": [
-              ["/api/users", {"name": "Jane Smith", "email": "jane@example.com"}, 201]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/api_testing/feedback.json b/tests/data/api_testing/feedback.json
deleted file mode 100644
index d9921dd..0000000
--- a/tests/data/api_testing/feedback.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "general": {
-    "report_title": "API Testing Assignment Feedback",
-    "show_passed_tests": true,
-    "show_test_details": true
-  },
-  "default": {
-    "category_headers": {
-      "base": "API Testing Requirements",
-      "api_endpoints": "API Endpoints",
-      "api_methods": "HTTP Methods"
-    }
-  }
-}
diff --git a/tests/data/api_testing/package.json b/tests/data/api_testing/package.json
deleted file mode 100644
index 62900cf..0000000
--- a/tests/data/api_testing/package.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "name": "student-api",
-  "version": "1.0.0",
-  "description": "Student API assignment",
-  "main": "server.js",
-  "scripts": {
-    "start": "node server.js"
-  },
-  "dependencies": {
-    "express": "^4.18.2"
-  }
-}
diff --git a/tests/data/api_testing/server.js b/tests/data/api_testing/server.js
deleted file mode 100644
index e558f06..0000000
--- a/tests/data/api_testing/server.js
+++ /dev/null
@@ -1,56 +0,0 @@
-const express = require('express');
-const app = express();
-const port = process.env.PORT || 8000;
-
-app.use(express.json());
-
-// In-memory database
-let users = [
-    { id: 1, name: 'John Doe', email: 'john@example.com' },
-    { id: 2, name: 'Jane Smith', email: 'jane@example.com' }
-];
-
-// Health check endpoint
-app.get('/health', (req, res) => {
-    res.status(200).json({ status: 'ok', message: 'API is running' });
-});
-
-// Get all users
-app.get('/api/users', (req, res) => {
-    res.json({ users: users });
-});
-
-// Get single user
-app.get('/api/user/:id', (req, res) => {
-    const userId = parseInt(req.params.id);
-    const user = users.find(u => u.id === userId);
-    
-    if (user) {
-        res.json(user);
-    } else {
-        res.status(404).json({ error: 'User not found' });
-    }
-});
-
-// Create new user
-app.post('/api/users', (req, res) => {
-    const { name, email } = req.body;
-    
-    if (!name || !email) {
-        return res.status(400).json({ error: 'Name and email are required' });
-    }
-    
-    const newUser = {
-        id: users.length + 1,
-        name,
-        email
-    };
-    
-    users.push(newUser);
-    res.status(201).json(newUser);
-});
-
-// Listen on 0.0.0.0 to accept external connections
-app.listen(port, '0.0.0.0', () => {
-    console.log(`Server is running on port ${port}`);
-});
diff --git a/tests/data/api_testing/setup.json b/tests/data/api_testing/setup.json
deleted file mode 100644
index c2993d3..0000000
--- a/tests/data/api_testing/setup.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "runtime_image": "node:18-alpine",
-  "container_port": 8000,
-  "start_command": "node server.js",
-  "commands": {
-    "install_dependencies": "npm install"
-  }
-}
diff --git a/tests/data/curl_examples.sh b/tests/data/curl_examples.sh
deleted file mode 100644
index 55e0dbc..0000000
--- a/tests/data/curl_examples.sh
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/bin/bash
-# Autograder API Test Examples using cURL
-# ========================================
-# This script contains cURL commands to test the Autograder API
-
-# Set the base URL (change this to your API endpoint)
-BASE_URL="http://localhost:8000"
-
-# Colors for output
-GREEN='\033[0;32m'
-BLUE='\033[0;34m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# Function to print headers
-print_header() {
-    echo -e "\n${BLUE}========================================${NC}"
-    echo -e "${BLUE}$1${NC}"
-    echo -e "${BLUE}========================================${NC}\n"
-}
-
-# Change to the tests/data directory
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-cd "$SCRIPT_DIR"
-
-# ========================================
-# Test 1: Web Development Template
-# ========================================
-test_web_dev() {
-    print_header "TEST 1: Web Development Template"
-    
-    curl -X POST "$BASE_URL/grade_submission/" \
-      -F "submission_files=@web_dev/index.html" \
-      -F "submission_files=@web_dev/style.css" \
-      -F "submission_files=@web_dev/script.js" \
-      -F "criteria_json=@web_dev/criteria.json" \
-      -F "feedback_json=@web_dev/feedback.json" \
-      -F "template_preset=web dev" \
-      -F "student_name=John Doe" \
-      -F "student_credentials=test-token-123" \
-      -F "include_feedback=true" \
-      -F "feedback_type=default" \
-      | jq '.'
-}
-
-# ========================================
-# Test 2: API Testing Template
-# ========================================
-test_api() {
-    print_header "TEST 2: API Testing Template"
-    
-    curl -X POST "$BASE_URL/grade_submission/" \
-      -F "submission_files=@api_testing/server.js" \
-      -F "submission_files=@api_testing/package.json" \
-      -F "criteria_json=@api_testing/criteria.json" \
-      -F "feedback_json=@api_testing/feedback.json" \
-      -F "setup_json=@api_testing/setup.json" \
-      -F "template_preset=api" \
-      -F "student_name=Jane Smith" \
-      -F "student_credentials=test-token-456" \
-      -F "include_feedback=true" \
-      -F "feedback_type=default" \
-      | jq '.'
-}
-
-# ========================================
-# Test 3: Input/Output Template
-# ========================================
-test_io() {
-    print_header "TEST 3: Input/Output Template"
-    
-    curl -X POST "$BASE_URL/grade_submission/" \
-      -F "submission_files=@input_output/calculator.py" \
-      -F "submission_files=@input_output/requirements.txt" \
-      -F "criteria_json=@input_output/criteria.json" \
-      -F "feedback_json=@input_output/feedback.json" \
-      -F "setup_json=@input_output/setup.json" \
-      -F "template_preset=io" \
-      -F "student_name=Bob Johnson" \
-      -F "student_credentials=test-token-789" \
-      -F "include_feedback=true" \
-      -F "feedback_type=default" \
-      | jq '.'
-}
-
-# ========================================
-# Test 4: Essay Template
-# ========================================
-test_essay() {
-    print_header "TEST 4: Essay Template"
-    
-    curl -X POST "$BASE_URL/grade_submission/" \
-      -F "submission_files=@essay/essay.txt" \
-      -F "criteria_json=@essay/criteria.json" \
-      -F "feedback_json=@essay/feedback.json" \
-      -F "template_preset=essay" \
-      -F "student_name=Eve Adams" \
-      -F "student_credentials=test-token-202" \
-      -F "include_feedback=true" \
-      -F "feedback_type=default" \
-      | jq '.'
-}
-
-# ========================================
-# Test 5: Custom Template
-# ========================================
-test_custom() {
-    print_header "TEST 5: Custom Template"
-    
-    curl -X POST "$BASE_URL/grade_submission/" \
-      -F "submission_files=@custom_template/main.py" \
-      -F "criteria_json=@custom_template/criteria.json" \
-      -F "feedback_json=@custom_template/feedback.json" \
-      -F "custom_template=@custom_template/custom_template.py" \
-      -F "template_preset=custom" \
-      -F "student_name=Alice Williams" \
-      -F "student_credentials=test-token-101" \
-      -F "include_feedback=true" \
-      -F "feedback_type=default" \
-      | jq '.'
-}
-
-# ========================================
-# Template Info - Web Dev
-# ========================================
-test_template_info_web() {
-    print_header "TEMPLATE INFO: Web Dev"
-    
-    curl -X GET "$BASE_URL/templates/webdev" | jq '.'
-}
-
-# ========================================
-# Template Info - API
-# ========================================
-test_template_info_api() {
-    print_header "TEMPLATE INFO: API"
-    
-    curl -X GET "$BASE_URL/templates/api" | jq '.'
-}
-
-# ========================================
-# Template Info - I/O
-# ========================================
-test_template_info_io() {
-    print_header "TEMPLATE INFO: I/O"
-    
-    curl -X GET "$BASE_URL/templates/io" | jq '.'
-}
-
-# ========================================
-# Template Info - Essay
-# ========================================
-test_template_info_essay() {
-    print_header "TEMPLATE INFO: Essay"
-    
-    curl -X GET "$BASE_URL/templates/essay" | jq '.'
-}
-
-# ========================================
-# Main Menu
-# ========================================
-show_menu() {
-    echo -e "\n${GREEN}Autograder API Test Suite - cURL Edition${NC}"
-    echo "========================================"
-    echo "Base URL: $BASE_URL"
-    echo ""
-    echo "1. Test Web Development Template"
-    echo "2. Test API Testing Template"
-    echo "3. Test Input/Output Template"
-    echo "4. Test Essay Template"
-    echo "5. Test Custom Template"
-    echo "6. Get Template Info - Web Dev"
-    echo "7. Get Template Info - API"
-    echo "8. Get Template Info - I/O"
-    echo "9. Get Template Info - Essay"
-    echo "10. Run All Tests"
-    echo "11. Change Base URL"
-    echo "0. Exit"
-    echo ""
-}
-
-# Run all tests
-run_all() {
-    test_web_dev
-    test_api
-    test_io
-    test_essay
-    test_custom
-    test_template_info_web
-    test_template_info_api
-    test_template_info_io
-    test_template_info_essay
-}
-
-# Main loop
-if [ "$1" = "--all" ]; then
-    run_all
-elif [ "$1" = "--web" ]; then
-    test_web_dev
-elif [ "$1" = "--api" ]; then
-    test_api
-elif [ "$1" = "--io" ]; then
-    test_io
-elif [ "$1" = "--essay" ]; then
-    test_essay
-elif [ "$1" = "--custom" ]; then
-    test_custom
-elif [ "$1" = "--url" ] && [ -n "$2" ]; then
-    BASE_URL="$2"
-    echo "Base URL set to: $BASE_URL"
-    run_all
-else
-    # Interactive mode
-    while true; do
-        show_menu
-        read -p "Select an option (0-11): " choice
-        
-        case $choice in
-            1) test_web_dev ;;
-            2) test_api ;;
-            3) test_io ;;
-            4) test_essay ;;
-            5) test_custom ;;
-            6) test_template_info_web ;;
-            7) test_template_info_api ;;
-            8) test_template_info_io ;;
-            9) test_template_info_essay ;;
-            10) run_all ;;
-            11) 
-                read -p "Enter new base URL: " new_url
-                BASE_URL="$new_url"
-                echo -e "${GREEN}Base URL updated to: $BASE_URL${NC}"
-                ;;
-            0) 
-                echo -e "\n${GREEN}Goodbye!${NC}\n"
-                exit 0
-                ;;
-            *)
-                echo -e "${RED}Invalid option. Please select 0-11.${NC}"
-                ;;
-        esac
-        
-        echo ""
-        read -p "Press Enter to continue..."
-    done
-fi
diff --git a/tests/data/custom_template/criteria.json b/tests/data/custom_template/criteria.json
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/custom_template/custom_template.py b/tests/data/custom_template/custom_template.py
deleted file mode 100644
index 23ce232..0000000
--- a/tests/data/custom_template/custom_template.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from autograder.builder.models.template import Template
-from autograder.builder.models.test_function import TestFunction
-from autograder.builder.models.param_description import ParamDescription
-from autograder.models.dataclass.test_result import TestResult
-from autograder.context import request_context
-
-
-class CheckFileExists(TestFunction):
-    """Test to check if a specific file exists in the submission."""
-    
-    @property
-    def name(self):
-        return "check_file_exists"
-    
-    @property
-    def description(self):
-        return "Checks if a specified file exists in the student submission."
-    
-    @property
-    def required_file(self):
-        return None
-    
-    @property
-    def parameter_description(self):
-        return [
-            ParamDescription("filename", "The name of the file to check for.", "string")
-        ]
-    
-    def execute(self, filename: str) -> TestResult:
-        request = request_context.get_request()
-        submission_files = request.submission_files
-
-        if filename in submission_files:
-            return TestResult(
-                self.name,
-                100,
-                f"File '{filename}' was found in the submission.",
-                parameters={"filename": filename}
-            )
-        else:
-            return TestResult(
-                self.name,
-                0,
-                f"File '{filename}' was NOT found in the submission.",
-                parameters={"filename": filename}
-            )
-
-
-class CheckFunctionExists(TestFunction):
-    """Test to check if a function is defined in a Python file."""
-    
-    @property
-    def name(self):
-        return "check_function_exists"
-    
-    @property
-    def description(self):
-        return "Checks if a function is defined in the main Python file."
-    
-    @property
-    def required_file(self):
-        return "PYTHON"
-    
-    @property
-    def parameter_description(self):
-        return [
-            ParamDescription("function_name", "The name of the function to check for.", "string")
-        ]
-    
-    def execute(self, python_content: str, function_name: str) -> TestResult:
-        if f"def {function_name}(" in python_content:
-            return TestResult(
-                self.name,
-                100,
-                f"Function '{function_name}()' was found in the code.",
-                parameters={"function_name": function_name}
-            )
-        else:
-            return TestResult(
-                self.name,
-                0,
-                f"Function '{function_name}()' was NOT found in the code.",
-                parameters={"function_name": function_name}
-            )
-
-
-class CustomTemplate(Template):
-    """A custom template for basic Python file checking."""
-    
-    @property
-    def template_name(self):
-        return "Custom Template"
-    
-    @property
-    def template_description(self):
-        return "A custom template for checking Python file structure."
-    
-    @property
-    def requires_pre_executed_tree(self) -> bool:
-        return False
-    
-    @property
-    def requires_execution_helper(self) -> bool:
-        return False
-    
-    def __init__(self, clean=False):
-        self.tests = {
-            "check_file_exists": CheckFileExists(),
-            "check_function_exists": CheckFunctionExists()
-        }
-    
-    def get_test(self, name: str) -> TestFunction:
-        test = self.tests.get(name)
-        if not test:
-            raise AttributeError(f"Test '{name}' not found in custom template.")
-        return test
diff --git a/tests/data/custom_template/feedback.json b/tests/data/custom_template/feedback.json
deleted file mode 100644
index 5629d00..0000000
--- a/tests/data/custom_template/feedback.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "general": {
-    "report_title": "Custom Template Assignment Feedback",
-    "show_passed_tests": true,
-    "show_test_details": true
-  },
-  "default": {
-    "category_headers": {
-      "base": "Custom Template Requirements"
-    }
-  }
-}
diff --git a/tests/data/custom_template/main.py b/tests/data/custom_template/main.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/essay/criteria.json b/tests/data/essay/criteria.json
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/essay/essay.txt b/tests/data/essay/essay.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/essay/feedback.json b/tests/data/essay/feedback.json
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/input_output/calculator.py b/tests/data/input_output/calculator.py
deleted file mode 100644
index 6be2dbd..0000000
--- a/tests/data/input_output/calculator.py
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple Calculator Program
-Reads operation and two numbers from stdin and outputs the result.
-"""
-
-import sys
-
-
-def main():
-    try:
-        # Read inputs from stdin
-        operation = input().strip()
-        num1 = float(input().strip())
-        num2 = float(input().strip())
-        
-        # Perform calculation based on operation
-        if operation == "add":
-            result = num1 + num2
-        elif operation == "subtract":
-            result = num1 - num2
-        elif operation == "multiply":
-            result = num1 * num2
-        elif operation == "divide":
-            if num2 == 0:
-                print("Error: Division by zero")
-                return
-            result = num1 / num2
-        else:
-            print(f"Error: Unknown operation '{operation}'")
-            return
-        
-        # Print result (integer if whole number, otherwise float)
-        if result == int(result):
-            print(int(result))
-        else:
-            print(result)
-            
-    except ValueError:
-        print("Error: Invalid input")
-    except EOFError:
-        print("Error: Unexpected end of input")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/data/input_output/criteria.json b/tests/data/input_output/criteria.json
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/input_output/feedback.json b/tests/data/input_output/feedback.json
deleted file mode 100644
index 9088486..0000000
--- a/tests/data/input_output/feedback.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "general": {
-    "report_title": "Calculator Assignment Feedback",
-    "show_passed_tests": true,
-    "show_test_details": true
-  },
-  "default": {
-    "category_headers": {
-      "base": "Calculator Requirements",
-      "basic_operations": "Basic Operations",
-      "edge_cases": "Edge Cases & Special Scenarios"
-    }
-  }
-}
diff --git a/tests/data/input_output/requirements.txt b/tests/data/input_output/requirements.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/input_output/setup.json b/tests/data/input_output/setup.json
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/data/web_dev/criteria.json b/tests/data/web_dev/criteria.json
deleted file mode 100644
index ab144a7..0000000
--- a/tests/data/web_dev/criteria.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
-  "base": {
-    "weight": 100,
-    "subjects": {
-      "html_structure": {
-        "weight": 40,
-        "tests":
-        [
-          {
-            "name": "has_tag",
-            "file": "index.html",
-            "calls": [
-              ["div", 5],
-              ["h1", 2],
-              ["p", 3],
-              ["a", 2]
-            ]
-          }
-        ]
-      },
-      "css_styling": {
-        "weight": 30,
-        "tests": [
-          {
-            "name": "has_class",
-            "file": "index.html",
-            "calls": [
-              [["container", "row", "col-*"], 10]
-            ]
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/tests/data/web_dev/feedback.json b/tests/data/web_dev/feedback.json
deleted file mode 100644
index 0e1806d..0000000
--- a/tests/data/web_dev/feedback.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "general": {
-    "report_title": "Web Development Assignment Feedback",
-    "show_passed_tests": true,
-    "show_test_details": true
-  },
-  "default": {
-    "category_headers": {
-      "base": "Core Web Development Requirements",
-      "html_structure": "HTML Structure & Semantics",
-      "css_styling": "CSS Styling & Design",
-      "javascript_functionality": "JavaScript Functionality"
-    }
-  }
-}
diff --git a/tests/data/web_dev/index.html b/tests/data/web_dev/index.html
deleted file mode 100644
index c5207ea..0000000
--- a/tests/data/web_dev/index.html
+++ /dev/null
@@ -1,39 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Student Portfolio</title>
-    <link rel="stylesheet" href="style.css">
-</head>
-<body>
-    <header class="container">
-        <h1>John Doe - Portfolio</h1>
-        <nav>
-            <a href="#about">About</a>
-            <a href="#projects">Projects</a>
-        </nav>
-    </header>
-
-    <div class="container row">
-        <div class="col-md-6">
-            <h1>Welcome</h1>
-            <p>This is my portfolio website showcasing my work.</p>
-            <p>I'm a passionate developer with experience in web technologies.</p>
-            <p>Check out my projects below!</p>
-        </div>
-        <div class="col-md-6">
-            <div class="card">
-                <h2>About Me</h2>
-                <p>I love coding and creating amazing web experiences.</p>
-            </div>
-        </div>
-    </div>
-
-    <footer class="container">
-        <p>&copy; 2024 John Doe</p>
-    </footer>
-
-    <script src="script.js"></script>
-</body>
-</html>
diff --git a/tests/data/web_dev/script.js b/tests/data/web_dev/script.js
deleted file mode 100644
index 68ea57c..0000000
--- a/tests/data/web_dev/script.js
+++ /dev/null
@@ -1,33 +0,0 @@
-// Interactive features for the portfolio
-
-document.addEventListener('DOMContentLoaded', function() {
-    console.log('Portfolio loaded successfully!');
-    
-    // Add smooth scrolling to navigation links
-    const navLinks = document.querySelectorAll('nav a');
-    
-    navLinks.forEach(link => {
-        link.addEventListener('click', function(e) {
-            e.preventDefault();
-            const targetId = this.getAttribute('href');
-            const targetElement = document.querySelector(targetId);
-            
-            if (targetElement) {
-                targetElement.scrollIntoView({ behavior: 'smooth' });
-            }
-        });
-    });
-    
-    // Add hover effect to cards
-    const cards = document.querySelectorAll('.card');
-    cards.forEach(card => {
-        card.addEventListener('mouseenter', function() {
-            this.style.transform = 'scale(1.05)';
-            this.style.transition = 'transform 0.3s ease';
-        });
-        
-        card.addEventListener('mouseleave', function() {
-            this.style.transform = 'scale(1)';
-        });
-    });
-});
diff --git a/tests/data/web_dev/style.css b/tests/data/web_dev/style.css
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/playroom.py b/tests/playroom.py
deleted file mode 100644
index cf9e339..0000000
--- a/tests/playroom.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-DEPRECATED: This file has been moved to tests/playroom/ directory.
-
-The playroom functionality has been refactored into multiple template-specific
-playrooms for better organization and testing coverage.
-
-Please use one of the following:
-- tests/playroom/webdev_playroom.py - Web Development template
-- tests/playroom/api_playroom.py - API Testing template
-- tests/playroom/essay_playroom.py - Essay Grading template
-- tests/playroom/io_playroom.py - Input/Output template
-- tests/playroom/run_all_playrooms.py - Run all playrooms
-
-Usage:
-    python -m tests.playroom.webdev_playroom
-    python -m tests.playroom.run_all_playrooms
-    python -m tests.playroom.run_all_playrooms webdev
-
-See tests/playroom/README.md for full documentation.
-"""
-
-import sys
-import os
-
-# Add a deprecation warning
-print("\n" + "!" * 70)
-print("WARNING: This file is deprecated!")
-print("!" * 70)
-print("\nPlayrooms have been refactored into separate template-specific files.")
-print("Please use the new playroom directory structure:\n")
-print("  - tests/playroom/webdev_playroom.py")
-print("  - tests/playroom/api_playroom.py")
-print("  - tests/playroom/essay_playroom.py")
-print("  - tests/playroom/io_playroom.py")
-print("  - tests/playroom/run_all_playrooms.py")
-print("\nFor backward compatibility, running the webdev playroom...\n")
-print("!" * 70 + "\n")
-
-# For backward compatibility, run the webdev playroom
-try:
-    from tests.playroom.webdev_playroom import run_webdev_playroom
-    run_webdev_playroom()
-except ImportError:
-    print("ERROR: Could not import new playroom structure.")
-    print("Please run from project root: python -m tests.playroom.webdev_playroom")
-    sys.exit(1)
diff --git a/tests/playroom/README.md b/tests/playroom/README.md
deleted file mode 100644
index 8d0006e..0000000
--- a/tests/playroom/README.md
+++ /dev/null
@@ -1,286 +0,0 @@
-# Autograder Playrooms
-
-Welcome to the Autograder Playrooms! This directory contains comprehensive test environments for each grading template, allowing you to fully mock and test grading operations end-to-end.
-
-## Overview
-
-Each playroom provides a complete grading scenario including:
-- **Submission Files**: Realistic student code/content submissions
-- **Setup Configuration**: Docker/sandbox environment setup when needed
-- **Criteria Configuration**: Test functions and grading criteria
-- **Feedback Preferences**: Customized feedback settings
-- **Full Execution**: Complete autograder workflow from submission to final report
-
-## Available Playrooms
-
-### 1. Web Development (`webdev_playroom.py`)
-Tests HTML/CSS grading capabilities with Bootstrap integration.
-
-**Features:**
-- HTML file with Bootstrap framework
-- CSS class detection tests
-- Bootstrap component validation
-- Custom styling checks
-
-**Run:**
-```bash
-python -m tests.playroom.webdev_playroom
-```
-
-**Requirements:** None (no Docker needed)
-
----
-
-### 2. API Testing (`api_playroom.py`)
-Tests REST API endpoint validation in a containerized environment.
-
-**Features:**
-- Flask API with multiple endpoints
-- Docker containerization
-- Health check testing
-- GET/POST endpoint validation
-- JSON response verification
-
-**Run:**
-```bash
-python -m tests.playroom.api_playroom
-```
-
-**Requirements:** Docker must be running
-
----
-
-### 3. Essay Grading (`essay_playroom.py`)
-Tests AI-powered essay evaluation capabilities.
-
-**Features:**
-- Sample essay submission
-- AI-based criteria (clarity, grammar, argument strength)
-- Thesis statement evaluation
-- Adherence to prompt checking
-
-**Run:**
-```bash
-export OPENAI_API_KEY='your-key-here'
-python -m tests.playroom.essay_playroom
-```
-
-**Requirements:** OpenAI API key set in environment
-
----
-
-### 4. Input/Output (`io_playroom.py`)
-Tests command-line program validation with stdin/stdout testing.
-
-**Features:**
-- Python calculator program
-- Multiple input/output test cases
-- Stdin input injection
-- Stdout output validation
-- Docker containerized execution
-
-**Run:**
-```bash
-python -m tests.playroom.io_playroom
-```
-
-**Requirements:** Docker must be running
-
----
-
-## Running Playrooms
-
-### Run Individual Playroom
-```bash
-# Run a specific playroom
-python -m tests.playroom.webdev_playroom
-python -m tests.playroom.api_playroom
-python -m tests.playroom.essay_playroom
-python -m tests.playroom.io_playroom
-```
-
-### Run Multiple Playrooms
-```bash
-# Run all playrooms
-python -m tests.playroom.run_all_playrooms
-
-# Run specific playrooms
-python -m tests.playroom.run_all_playrooms webdev io
-
-# Run multiple playrooms
-python -m tests.playroom.run_all_playrooms api essay
-```
-
-### List Available Playrooms
-```bash
-python -m tests.playroom.run_all_playrooms --list
-```
-
-## Playroom Structure
-
-Each playroom follows a consistent structure:
-
-```python
-def create_submission():
-    """Create mock submission files"""
-    return {...}
-
-def create_setup_config():
-    """Create sandbox/Docker setup if needed"""
-    return {...}
-
-def create_criteria_config():
-    """Define grading criteria and test functions"""
-    return {...}
-
-def create_feedback_config():
-    """Configure feedback preferences"""
-    return {...}
-
-def run_[template]_playroom():
-    """Execute the complete grading workflow"""
-    # 1. Create submission files
-    # 2. Setup configuration
-    # 3. Build autograder request
-    # 4. Execute grading
-    # 5. Display results
-```
-
-## What Gets Tested
-
-### For Each Playroom:
-1. **File Loading**: Submission files are properly loaded
-2. **Template Selection**: Correct template is initialized
-3. **Criteria Building**: Criteria tree is constructed from config
-4. **Test Execution**: All test functions run successfully
-5. **Scoring**: Weighted scores are calculated correctly
-6. **Feedback Generation**: Feedback is generated based on preferences
-7. **Response Format**: Final response matches expected structure
-
-## Customizing Playrooms
-
-You can modify playrooms to test specific scenarios:
-
-### Change Submission Content
-```python
-def create_html_submission():
-    return """<html>Your custom HTML here</html>"""
-```
-
-### Modify Criteria Weights
-```python
-def create_criteria_config():
-    return {
-        "Test Name": {
-            "weight": 50,  # Adjust weight
-            "test": "test_function_name",
-            "parameters": {...}
-        }
-    }
-```
-
-### Adjust Feedback Settings
-```python
-def create_feedback_config():
-    return {
-        "tone": "encouraging",  # or "professional", "constructive"
-        "detail_level": "detailed",  # or "brief", "comprehensive"
-        "include_suggestions": True
-    }
-```
-
-## Common Issues
-
-### Docker Not Running
-**Symptoms:** API or I/O playrooms fail with connection errors
-
-**Solution:**
-```bash
-# Check Docker status
-docker ps
-
-# Start Docker if needed
-sudo systemctl start docker  # Linux
-# or open Docker Desktop on Mac/Windows
-```
-
-### Missing OpenAI API Key
-**Symptoms:** Essay playroom exits with warning
-
-**Solution:**
-```bash
-export OPENAI_API_KEY='sk-your-key-here'
-```
-
-### Module Import Errors
-**Symptoms:** Cannot import autograder modules
-
-**Solution:**
-```bash
-# Run from project root
-cd /path/to/autograder
-python -m tests.playroom.webdev_playroom
-```
-
-## Development Tips
-
-### Adding a New Playroom
-
-1. Create a new file: `tests/playroom/my_template_playroom.py`
-2. Follow the existing structure
-3. Add to `run_all_playrooms.py` PLAYROOMS dict:
-```python
-PLAYROOMS = {
-    "mytemplate": {
-        "name": "My Template",
-        "runner": run_mytemplate_playroom,
-        "description": "Description here"
-    }
-}
-```
-
-### Testing Changes
-
-Use playrooms to quickly test autograder changes:
-1. Make changes to autograder code
-2. Run relevant playroom
-3. Check output for expected behavior
-
-### Debugging
-
-Add debug logging to playrooms:
-```python
-import logging
-logging.basicConfig(level=logging.DEBUG)
-```
-
-## Architecture
-
-```
-tests/playroom/
-├── __init__.py
-├── README.md                  # This file
-├── webdev_playroom.py        # Web development tests
-├── api_playroom.py           # API testing tests
-├── essay_playroom.py         # Essay grading tests
-├── io_playroom.py            # I/O testing tests
-└── run_all_playrooms.py      # Runner for all playrooms
-```
-
-## Contributing
-
-When adding new templates to the autograder:
-1. Create a corresponding playroom
-2. Include realistic submission examples
-3. Test all template features
-4. Document any special requirements
-5. Add to run_all_playrooms.py
-
-## License
-
-Same as parent project.
-
-## Questions?
-
-See main project documentation or contact the maintainers.
-
diff --git a/tests/playroom/__init__.py b/tests/playroom/__init__.py
deleted file mode 100644
index 017c4a4..0000000
--- a/tests/playroom/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-Playroom package for testing the Autograder system.
-
-Each playroom module simulates a complete grading workflow for a specific template:
-- webdev_playroom.py: Tests the web development template
-- essay_playroom.py: Tests the essay grading template
-- api_playroom.py: Tests the API testing template
-- io_playroom.py: Tests the input/output template
-"""
-
diff --git a/tests/playroom/api_playroom.py b/tests/playroom/api_playroom.py
deleted file mode 100644
index 695ff15..0000000
--- a/tests/playroom/api_playroom.py
+++ /dev/null
@@ -1,250 +0,0 @@
-"""
-API Testing Template Playroom
-
-This playroom demonstrates a complete grading operation for the API testing template.
-It includes:
-- Flask API submission files
-- Dockerfile for containerization
-- Setup configuration for sandbox execution
-- Criteria configuration with API test functions
-- Full mock grading execution
-"""
-
-import os
-import sys
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.autograder_facade import Autograder
-
-
-def create_api_submission():
-    """Create a sample Flask API submission."""
-    return """from flask import Flask, jsonify, request
-
-app = Flask(__name__)
-
-# In-memory data store
-users = [
-    {"id": 1, "name": "Alice", "email": "alice@example.com"},
-    {"id": 2, "name": "Bob", "email": "bob@example.com"}
-]
-
-@app.route('/health', methods=['GET'])
-def health_check():
-    return jsonify({"status": "healthy"}), 200
-
-@app.route('/api/users', methods=['GET'])
-def get_users():
-    return jsonify(users), 200
-
-@app.route('/api/users/<int:user_id>', methods=['GET'])
-def get_user(user_id):
-    user = next((u for u in users if u["id"] == user_id), None)
-    if user:
-        return jsonify(user), 200
-    return jsonify({"error": "User not found"}), 404
-
-@app.route('/api/users', methods=['POST'])
-def create_user():
-    data = request.get_json()
-    new_user = {
-        "id": len(users) + 1,
-        "name": data.get("name"),
-        "email": data.get("email")
-    }
-    users.append(new_user)
-    return jsonify(new_user), 201
-
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5000, debug=True)
-"""
-
-
-def create_dockerfile():
-    """Create a Dockerfile for the API."""
-    return """FROM python:3.9-slim
-
-WORKDIR /app
-
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-COPY app.py .
-
-EXPOSE 5000
-
-CMD ["python", "app.py"]
-"""
-
-
-def create_requirements_txt():
-    """Create requirements file for the API."""
-    return """Flask==2.3.0
-Werkzeug==2.3.0
-"""
-
-
-def create_setup_config():
-    """Create setup configuration for API testing."""
-    return {
-        "runtime_image": "python:3.9-slim",
-        "container_port": 5000,
-        "start_command": "python app.py",
-        "commands": {
-            "install_dependencies": "pip install Flask==2.3.0 Werkzeug==2.3.0"
-        }
-    }
-
-
-def create_criteria_config():
-    """Create criteria configuration for API grading."""
-    return {
-        "base": {
-            "weight": 100,
-            "subjects": {
-                "API Endpoints": {
-                    "weight": 100,
-                    "subjects": {
-                        "Health Check": {
-                            "weight": 30,
-                            "tests": [
-                                {
-                                    "name": "health_check",
-                                    "calls": [
-                                        ["/health"]
-                                    ]
-                                }
-                            ]
-                        },
-                        "Get All Users": {
-                            "weight": 35,
-                            "tests": [
-                                {
-                                    "name": "check_response_json",
-                                    "calls": [
-                                        ["/api/users", "0", {"id": 1}]
-                                    ]
-                                }
-                            ]
-                        },
-                        "Get Single User": {
-                            "weight": 35,
-                            "tests": [
-                                {
-                                    "name": "check_response_json",
-                                    "calls": [
-                                        ["/api/users/1", "id", 1],
-                                        ["/api/users/1", "name", "Alice"]
-                                    ]
-                                }
-                            ]
-                        }
-                    }
-                }
-            }
-        },
-        "bonus": {
-            "weight": 20,
-            "subjects": {
-                "Advanced Features": {
-                    "weight": 100,
-                    "tests": [
-                        {
-                            "name": "check_response_json",
-                            "calls": [
-                                ["/api/users/2", "email", "bob@example.com"]
-                            ]
-                        }
-                    ]
-                }
-            }
-        },
-        "penalty": {
-            "weight": 10
-        }
-    }
-
-
-def create_feedback_config():
-    """Create feedback preferences for the grading."""
-    return {
-        "general": {
-            "report_title": "Relatório de Avaliação - API REST",
-            "show_score": True,
-            "show_passed_tests": False,
-            "add_report_summary": True
-        },
-        "ai": {
-            "provide_solutions": "hint",
-            "feedback_tone": "professional",
-            "feedback_persona": "Senior Backend Developer",
-            "assignment_context": "Este é um teste de API REST com Flask."
-        },
-        "default": {
-            "category_headers": {
-                "base": "✅ Requisitos Essenciais",
-                "bonus": "⭐ Pontos Extras",
-                "penalty": "❌ Pontos a Melhorar"
-            }
-        }
-    }
-
-
-def run_api_playroom():
-    """Execute the API testing playroom."""
-    print("\n" + "="*70)
-    print("API TESTING TEMPLATE PLAYROOM")
-    print("="*70 + "\n")
-
-    # Create submission files
-    print("📄 Creating API submission files...")
-    submission_files = {
-        "app.py": create_api_submission()
-    }
-
-    # Create assignment configuration
-    print("⚙️  Setting up assignment configuration...")
-    assignment_config = AssignmentConfig(
-        template="api",
-        criteria=create_criteria_config(),
-        feedback=create_feedback_config(),
-        setup=create_setup_config()
-    )
-
-    # Create autograder request
-    print("📋 Building autograder request...")
-    request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="Jane Smith",
-        include_feedback=True,
-        feedback_mode="default"
-    )
-
-    # Execute grading
-    print("🚀 Starting grading process...")
-    print("⚠️  Note: This requires Docker to be running and may take a few minutes")
-    print("-"*70)
-    result = Autograder.grade(request)
-    print("-"*70)
-
-    # Display results
-    print("\n" + "="*70)
-    print("GRADING RESULTS")
-    print("="*70)
-    print(f"\n✅ Status: {result.status}")
-    print(f"📊 Final Score: {result.final_score}/100")
-    print(f"\n📝 Feedback:\n{result.feedback}")
-    print(f"\n📈 Test Report:\n{result.test_report}")
-    print("\n" + "="*70 + "\n")
-
-
-if __name__ == "__main__":
-    run_api_playroom()
-
diff --git a/tests/playroom/essay_playroom.py b/tests/playroom/essay_playroom.py
deleted file mode 100644
index d1647d1..0000000
--- a/tests/playroom/essay_playroom.py
+++ /dev/null
@@ -1,287 +0,0 @@
-"""
-Essay Grading Template Playroom
-
-This playroom demonstrates a complete grading operation for the essay grading template.
-It includes:
-- Essay submission file
-- AI-based criteria configuration
-- Feedback preferences
-- Full mock grading execution with OpenAI integration
-"""
-
-import os
-import sys
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.autograder_facade import Autograder
-
-
-def create_essay_submission():
-    """Create a sample essay submission."""
-    return """The Impact of Artificial Intelligence on Modern Education
-
-Introduction
-
-Artificial intelligence (AI) has emerged as a transformative force in numerous sectors, 
-and education is no exception. This essay explores how AI is reshaping the educational 
-landscape, examining both its benefits and challenges. The integration of AI technologies 
-in classrooms represents a fundamental shift in how we approach teaching and learning.
-
-The Promise of Personalized Learning
-
-One of the most significant advantages of AI in education is its ability to provide 
-personalized learning experiences. Traditional classroom settings often struggle to 
-accommodate the diverse learning paces and styles of individual students. AI-powered 
-adaptive learning systems can analyze student performance in real-time and adjust the 
-difficulty and presentation of material accordingly. This ensures that each student 
-receives instruction tailored to their specific needs, maximizing engagement and 
-comprehension.
-
-Moreover, AI tutoring systems can provide immediate feedback, something that would be 
-impossible for a single human instructor managing a large class. These systems can 
-identify when a student is struggling with a particular concept and offer additional 
-resources or alternative explanations. This level of individualized attention can 
-significantly improve learning outcomes.
-
-Administrative Efficiency and Teacher Support
-
-Beyond direct student interaction, AI is proving valuable in reducing the administrative 
-burden on educators. Automated grading systems can handle routine assessments, freeing 
-teachers to focus on more complex pedagogical tasks. AI can also assist in curriculum 
-planning, identifying gaps in course content and suggesting improvements based on 
-student performance data.
-
-However, it is crucial to note that AI should augment, not replace, human teachers. 
-The emotional intelligence, creativity, and nuanced understanding that experienced 
-educators bring to the classroom remain irreplaceable.
-
-Challenges and Ethical Considerations
-
-Despite its potential, the integration of AI in education raises important concerns. 
-Data privacy is paramount, as these systems collect vast amounts of information about 
-students' learning patterns and behaviors. There are also valid concerns about 
-algorithmic bias, where AI systems might inadvertently perpetuate existing inequalities 
-if trained on biased data.
-
-Additionally, there is the question of accessibility. Not all educational institutions 
-have the resources to implement sophisticated AI systems, potentially widening the gap 
-between well-funded and under-resourced schools.
-
-Conclusion
-
-Artificial intelligence holds tremendous promise for transforming education, offering 
-personalized learning experiences and supporting teachers in their work. However, its 
-implementation must be thoughtful and equitable, addressing concerns about privacy, 
-bias, and accessibility. As we move forward, the goal should be to harness AI's 
-capabilities while preserving the irreplaceable human elements of education. The future 
-of education likely lies not in AI replacing teachers, but in a collaborative model 
-where technology and human expertise work together to create the best possible learning 
-environment for all students.
-"""
-
-
-def create_criteria_config():
-    """Create criteria configuration for essay grading."""
-    return {
-        "base": {
-            "weight": 100,
-            "subjects": {
-                "Writing Quality": {
-                    "weight": 40,
-                    "subjects": {
-                        "Clarity and Cohesion": {
-                            "weight": 50,
-                            "tests": [
-                                {
-                                    "file": "essay.txt",
-                                    "name": "clarity_and_cohesion"
-                                }
-                            ]
-                        },
-                        "Grammar and Spelling": {
-                            "weight": 50,
-                            "tests": [
-                                {
-                                    "file": "essay.txt",
-                                    "name": "grammar_and_spelling"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "Content": {
-                    "weight": 60,
-                    "subjects": {
-                        "Thesis Statement": {
-                            "weight": 30,
-                            "tests": [
-                                {
-                                    "file": "essay.txt",
-                                    "name": "thesis_statement"
-                                }
-                            ]
-                        },
-                        "Argument Strength": {
-                            "weight": 40,
-                            "tests": [
-                                {
-                                    "file": "essay.txt",
-                                    "name": "argument_strength"
-                                }
-                            ]
-                        },
-                        "Adherence to Prompt": {
-                            "weight": 30,
-                            "tests": [
-                                {
-                                    "file": "essay.txt",
-                                    "name": "adherence_to_prompt",
-                                    "calls": [
-                                        ["Discuss the impact of artificial intelligence on modern education, including both benefits and challenges"]
-                                    ]
-                                }
-                            ]
-                        }
-                    }
-                }
-            }
-        },
-        "bonus": {
-            "weight": 20,
-            "subjects": {
-                "Advanced Elements": {
-                    "weight": 100,
-                    "subjects": {
-                        "Counterargument Handling": {
-                            "weight": 50,
-                            "tests": [
-                                {
-                                    "file": "essay.txt",
-                                    "name": "counterargument_handling"
-                                }
-                            ]
-                        },
-                        "Evidence Quality": {
-                            "weight": 50,
-                            "tests": [
-                                {
-                                    "file": "essay.txt",
-                                    "name": "evidence_quality"
-                                }
-                            ]
-                        }
-                    }
-                }
-            }
-        },
-        "penalty": {
-            "weight": 10,
-            "subjects": {
-                "Issues": {
-                    "weight": 100,
-                    "tests": [
-                        {
-                            "file": "essay.txt",
-                            "name": "logical_fallacy_check"
-                        }
-                    ]
-                }
-            }
-        }
-    }
-
-
-def create_feedback_config():
-    """Create feedback preferences for the grading."""
-    return {
-        "general": {
-            "report_title": "Relatório de Avaliação - Redação sobre IA na Educação",
-            "show_score": True,
-            "show_passed_tests": False,
-            "add_report_summary": True
-        },
-        "ai": {
-            "provide_solutions": "detailed",
-            "feedback_tone": "constructive and encouraging",
-            "feedback_persona": "Essay Writing Coach",
-            "assignment_context": "Este é um ensaio argumentativo sobre o impacto da IA na educação moderna.",
-            "extra_orientations": "Forneça sugestões específicas para melhorar a estrutura dos argumentos e a qualidade das evidências."
-        },
-        "default": {
-            "category_headers": {
-                "base": "✅ Requisitos Essenciais",
-                "bonus": "⭐ Elementos Avançados",
-                "penalty": "❌ Problemas Identificados"
-            }
-        }
-    }
-
-
-def run_essay_playroom():
-    """Execute the essay grading playroom."""
-    print("\n" + "="*70)
-    print("ESSAY GRADING TEMPLATE PLAYROOM")
-    print("="*70 + "\n")
-
-    # Check for OpenAI API key
-    openai_key = os.environ.get("OPENAI_API_KEY")
-    if not openai_key:
-        print("⚠️  WARNING: OPENAI_API_KEY not found in environment variables")
-        print("   Essay grading requires OpenAI API access")
-        print("   Please set OPENAI_API_KEY environment variable to run this playroom")
-        print("\n   Example: export OPENAI_API_KEY='your-key-here'\n")
-        return
-
-    # Create submission files
-    print("📄 Creating essay submission...")
-    submission_files = {
-        "essay.txt": create_essay_submission()
-    }
-
-    # Create assignment configuration
-    print("⚙️  Setting up assignment configuration...")
-    assignment_config = AssignmentConfig(
-        template="essay",
-        criteria=create_criteria_config(),
-        feedback=create_feedback_config(),
-        setup={}
-    )
-
-    # Create autograder request
-    print("📋 Building autograder request...")
-    request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="Alex Johnson",
-        include_feedback=True,
-        feedback_mode="ai",
-        openai_key=openai_key
-    )
-
-    # Execute grading
-    print("🚀 Starting grading process...")
-    print("⚠️  Note: This will make API calls to OpenAI and may take a minute")
-    print("-"*70)
-    result = Autograder.grade(request)
-    print("-"*70)
-
-    # Display results
-    print("\n" + "="*70)
-    print("GRADING RESULTS")
-    print("="*70)
-    print(f"\n✅ Status: {result.status}")
-    print(f"📊 Final Score: {result.final_score}/100")
-    print(f"\n📝 Feedback:\n{result.feedback}")
-    print(f"\n📈 Test Report:\n{result.test_report}")
-    print("\n" + "="*70 + "\n")
-
-
-if __name__ == "__main__":
-    run_essay_playroom()
-
diff --git a/tests/playroom/io_playroom.py b/tests/playroom/io_playroom.py
deleted file mode 100644
index 0645d80..0000000
--- a/tests/playroom/io_playroom.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-Input/Output Template Playroom
-
-This playroom demonstrates a complete grading operation for the input/output template.
-It includes:
-- Python program submission that accepts stdin input
-- Dockerfile for containerized execution
-- Setup configuration for sandbox execution
-- Criteria configuration with I/O test functions
-- Full mock grading execution
-"""
-
-import os
-import sys
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.autograder_facade import Autograder
-
-
-def create_calculator_submission():
-    """Create a sample Python calculator program that accepts input."""
-    return """#!/usr/bin/env python3
-# Simple Calculator Program
-
-def main():
-    print("Simple Calculator")
-    print("Enter first number:")
-    num1 = float(input())
-    
-    print("Enter operation (+, -, *, /):")
-    operation = input().strip()
-    
-    print("Enter second number:")
-    num2 = float(input())
-    
-    if operation == '+':
-        result = num1 + num2
-    elif operation == '-':
-        result = num1 - num2
-    elif operation == '*':
-        result = num1 * num2
-    elif operation == '/':
-        if num2 != 0:
-            result = num1 / num2
-        else:
-            print("Error: Division by zero")
-            return
-    else:
-        print("Error: Invalid operation")
-        return
-    
-    print(f"Result: {result}")
-
-if __name__ == "__main__":
-    main()
-"""
-
-
-def create_setup_config():
-    """Create setup configuration for I/O testing."""
-    return {
-        "runtime_image": "python:3.9-slim",
-        "container_port": None,  # No port mapping needed for I/O testing
-        "execution_timeout": 10,
-        "start_command": "python3 calculator.py"
-    }
-
-
-def create_criteria_config():
-    """Create criteria configuration for I/O grading."""
-    return {
-        "base": {
-            "weight": 100,
-            "subjects": {
-                "Basic Operations": {
-                    "weight": 100,
-                    "subjects": {
-                        "Addition": {
-                            "weight": 25,
-                            "tests": [
-                                {
-                                    "name": "expect_output",
-                                    "calls": [
-                                        [["10", "+", "5"], "Result: 15.0"]
-                                    ]
-                                }
-                            ]
-                        },
-                        "Subtraction": {
-                            "weight": 25,
-                            "tests": [
-                                {
-                                    "name": "expect_output",
-                                    "calls": [
-                                        [["20", "-", "8"], "Result: 12.0"]
-                                    ]
-                                }
-                            ]
-                        },
-                        "Multiplication": {
-                            "weight": 25,
-                            "tests": [
-                                {
-                                    "name": "expect_output",
-                                    "calls": [
-                                        [["6", "*", "7"], "Result: 42.0"]
-                                    ]
-                                }
-                            ]
-                        },
-                        "Division": {
-                            "weight": 25,
-                            "tests": [
-                                {
-                                    "name": "expect_output",
-                                    "calls": [
-                                        [["100", "/", "4"], "Result: 25.0"]
-                                    ]
-                                }
-                            ]
-                        }
-                    }
-                }
-            }
-        },
-        "bonus": {
-            "weight": 20,
-            "subjects": {
-                "Error Handling": {
-                    "weight": 100,
-                    "tests": [
-                        {
-                            "name": "expect_output",
-                            "calls": [
-                                [["10", "/", "0"], "Error: Division by zero"]
-                            ]
-                        }
-                    ]
-                }
-            }
-        }
-    }
-
-
-def create_feedback_config():
-    """Create feedback preferences for the grading."""
-    return {
-        "general": {
-            "report_title": "Relatório de Avaliação - Calculadora",
-            "show_score": True,
-            "show_passed_tests": False,
-            "add_report_summary": True
-        },
-        "ai": {
-            "provide_solutions": "hint",
-            "feedback_tone": "encouraging but direct",
-            "feedback_persona": "Code Buddy",
-            "assignment_context": "Este é um teste de programa interativo com entrada/saída."
-        },
-        "default": {
-            "category_headers": {
-                "base": "✅ Requisitos Essenciais",
-                "bonus": "⭐ Pontos Extras"
-            }
-        }
-    }
-
-
-def run_io_playroom():
-    """Execute the input/output playroom."""
-    print("\n" + "="*70)
-    print("INPUT/OUTPUT TEMPLATE PLAYROOM")
-    print("="*70 + "\n")
-
-    # Create submission files
-    print("📄 Creating Python calculator submission...")
-    submission_files = {
-        "calculator.py": create_calculator_submission()
-    }
-
-    # Create assignment configuration
-    print("⚙️  Setting up assignment configuration...")
-    assignment_config = AssignmentConfig(
-        template="io",
-        criteria=create_criteria_config(),
-        feedback=create_feedback_config(),
-        setup=create_setup_config()
-    )
-
-    # Create autograder request
-    print("📋 Building autograder request...")
-    request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="Sam Wilson",
-        include_feedback=True,
-        feedback_mode="default"
-    )
-
-    # Execute grading
-    print("🚀 Starting grading process...")
-    print("⚠️  Note: This requires Docker to be running")
-    print("-"*70)
-    result = Autograder.grade(request)
-    print("-"*70)
-
-    # Display results
-    print("\n" + "="*70)
-    print("GRADING RESULTS")
-    print("="*70)
-    print(f"\n✅ Status: {result.status}")
-    print(f"📊 Final Score: {result.final_score}/100")
-    print(f"\n📝 Feedback:\n{result.feedback}")
-    print(f"\n📈 Test Report:\n{result.test_report}")
-    print("\n" + "="*70 + "\n")
-
-
-if __name__ == "__main__":
-    run_io_playroom()
-
diff --git a/tests/playroom/run_all_playrooms.py b/tests/playroom/run_all_playrooms.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/playroom/webdev_playroom.py b/tests/playroom/webdev_playroom.py
deleted file mode 100644
index 8eac1e8..0000000
--- a/tests/playroom/webdev_playroom.py
+++ /dev/null
@@ -1,241 +0,0 @@
-"""
-Web Development Template Playroom
-
-This playroom demonstrates a complete grading operation for the web development template.
-It includes:
-- HTML submission files with Bootstrap and CSS classes
-- Criteria configuration with multiple test functions
-- Feedback preferences
-- Full mock grading execution
-"""
-
-import os
-import sys
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-from connectors.models.autograder_request import AutograderRequest
-from connectors.models.assignment_config import AssignmentConfig
-from autograder.autograder_facade import Autograder
-
-
-def create_html_submission():
-    """Create a sample HTML submission with Bootstrap and CSS classes."""
-    return """<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Student Portfolio</title>
-    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
-    <style>
-        .custom-header { background-color: #f8f9fa; }
-        .custom-card { border-radius: 10px; }
-    </style>
-</head>
-<body>
-    <header class="custom-header p-4">
-        <h1 class="display-4">Welcome to My Portfolio</h1>
-        <p class="lead">A showcase of my work</p>
-    </header>
-    
-    <div class="container mt-5">
-        <div class="row">
-            <div class="col-md-4">
-                <div class="card custom-card mb-4">
-                    <div class="card-body">
-                        <h5 class="card-title">Project 1</h5>
-                        <p class="card-text">Description of project 1</p>
-                    </div>
-                </div>
-            </div>
-            <div class="col-md-4">
-                <div class="card custom-card mb-4">
-                    <div class="card-body">
-                        <h5 class="card-title">Project 2</h5>
-                        <p class="card-text">Description of project 2</p>
-                    </div>
-                </div>
-            </div>
-            <div class="col-md-4">
-                <div class="card custom-card mb-4">
-                    <div class="card-body">
-                        <h5 class="card-title">Project 3</h5>
-                        <p class="card-text">Description of project 3</p>
-                    </div>
-                </div>
-            </div>
-        </div>
-    </div>
-    
-    <footer class="mt-5 p-4 bg-dark text-white text-center">
-        <p>&copy; 2024 Student Portfolio</p>
-    </footer>
-    
-    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script>
-</body>
-</html>"""
-
-
-def create_criteria_config():
-    """Create criteria configuration for web development grading."""
-    return {
-        "base": {
-            "weight": 100,
-            "subjects": {
-                "HTML Structure": {
-                    "weight": 50,
-                    "subjects": {
-                        "Bootstrap Integration": {
-                            "weight": 40,
-                            "tests": [
-                                {
-                                    "file": "index.html",
-                                    "name": "check_bootstrap_linked"
-                                }
-                            ]
-                        },
-                        "Bootstrap Grid Classes": {
-                            "weight": 60,
-                            "tests": [
-                                {
-                                    "file": "index.html",
-                                    "name": "has_class",
-                                    "calls": [
-                                        [["col-*"], 3]
-                                    ]
-                                }
-                            ]
-                        }
-                    }
-                },
-                "Components": {
-                    "weight": 50,
-                    "subjects": {
-                        "Card Components": {
-                            "weight": 50,
-                            "tests": [
-                                {
-                                    "file": "index.html",
-                                    "name": "has_class",
-                                    "calls": [
-                                        [["card", "card-body"], 6]
-                                    ]
-                                }
-                            ]
-                        },
-                        "Custom Styling": {
-                            "weight": 50,
-                            "tests": [
-                                {
-                                    "file": "index.html",
-                                    "name": "has_class",
-                                    "calls": [
-                                        [["custom-*"], 2]
-                                    ]
-                                }
-                            ]
-                        }
-                    }
-                }
-            }
-        },
-        "bonus": {
-            "weight": 20,
-            "subjects": {
-                "Best Practices": {
-                    "weight": 100,
-                    "tests": [
-                        {
-                            "file": "index.html",
-                            "name": "check_no_inline_styles"
-                        }
-                    ]
-                }
-            }
-        },
-        "penalty": {
-            "weight": 10
-        }
-    }
-
-
-def create_feedback_config():
-    """Create feedback preferences for the grading."""
-    return {
-        "general": {
-            "report_title": "Relatório de Avaliação - Portfolio Web",
-            "show_score": True,
-            "show_passed_tests": False,
-            "add_report_summary": True
-        },
-        "ai": {
-            "provide_solutions": "hint",
-            "feedback_tone": "encouraging",
-            "feedback_persona": "Web Development Mentor",
-            "assignment_context": "Este é um projeto de portfolio web usando Bootstrap e HTML/CSS."
-        },
-        "default": {
-            "category_headers": {
-                "base": "✅ Requisitos Essenciais",
-                "bonus": "⭐ Pontos Extras",
-                "penalty": "❌ Pontos a Melhorar"
-            }
-        }
-    }
-
-
-def run_webdev_playroom():
-    """Execute the web development playroom."""
-    print("\n" + "="*70)
-    print("WEB DEVELOPMENT TEMPLATE PLAYROOM")
-    print("="*70 + "\n")
-
-    # Create submission files
-    print("📄 Creating HTML submission...")
-    submission_files = {
-        "index.html": create_html_submission()
-    }
-
-    # Create assignment configuration
-    print("⚙️  Setting up assignment configuration...")
-    assignment_config = AssignmentConfig(
-        template="webdev",
-        criteria=create_criteria_config(),
-        feedback=create_feedback_config(),
-        setup={}
-    )
-
-    # Create autograder request
-    print("📋 Building autograder request...")
-    request = AutograderRequest(
-        submission_files=submission_files,
-        assignment_config=assignment_config,
-        student_name="John Doe",
-        include_feedback=True,
-        feedback_mode="default"
-    )
-
-    # Execute grading
-    print("🚀 Starting grading process...\n")
-    print("-"*70)
-    result = Autograder.grade(request)
-    print("-"*70)
-
-    # Display results
-    print("\n" + "="*70)
-    print("GRADING RESULTS")
-    print("="*70)
-    print(f"\n✅ Status: {result.status}")
-    print(f"📊 Final Score: {result.final_score}/100")
-    print(f"\n📝 Feedback:\n{result.feedback}")
-    print(f"\n📈 Test Report:\n{result.test_report}")
-    print("\n" + "="*70 + "\n")
-
-
-if __name__ == "__main__":
-    run_webdev_playroom()
-
diff --git a/tests/test_pipeline_modes.py b/tests/test_pipeline_modes.py
deleted file mode 100644
index df88317..0000000
--- a/tests/test_pipeline_modes.py
+++ /dev/null
@@ -1,252 +0,0 @@
-"""
-Test the pipeline's ability to handle single vs multi-submission modes.
-
-This test verifies:
-1. Single submission mode: Grades directly from config (one-pass)
-2. Multi-submission mode: Builds tree once, grades multiple times
-"""
-
-import sys
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from autograder.autograder import build_pipeline
-from autograder.models.dataclass.criteria_config import CriteriaConfig
-
-
-def create_simple_criteria():
-    """Create simple test criteria."""
-    return {
-        "base": {
-            "weight": 90,
-            "subjects": [
-                {
-                    "subject_name": "Basic Tests",
-                    "weight": 100,
-                    "tests": [
-                        {
-                            "name": "always_pass",
-                            "parameters": {}
-                        },
-                        {
-                            "name": "check_value",
-                            "parameters": {
-                                "expected": 42
-                            }
-                        }
-                    ]
-                }
-            ]
-        },
-        "bonus": {
-            "weight": 10,
-            "tests": [
-                {
-                    "name": "always_pass",
-                    "parameters": {}
-                }
-            ]
-        }
-    }
-
-
-def create_mock_submission():
-    """Create mock submission files."""
-    return {
-        "main.py": "value = 42\n"
-    }
-
-
-def test_single_submission_mode():
-    """Test single submission mode (grade directly from config)."""
-    print("\n" + "="*80)
-    print("TEST: Single Submission Mode (Direct from Config)")
-    print("="*80)
-
-    criteria = create_simple_criteria()
-    submission = create_mock_submission()
-
-    # Build pipeline for single submission
-    pipeline = build_pipeline(
-        template_name="input_output",
-        include_feedback=False,
-        grading_criteria=criteria,
-        feedback_config=None,
-        setup_config=None,
-        custom_template=None,
-        feedback_mode=None,
-        submission_files=submission,
-        submission_id="test_001",
-        is_multi_submission=False  # Single submission mode
-    )
-
-    # Verify pipeline steps
-    print("\nPipeline Steps:")
-    for i, step in enumerate(pipeline._steps):
-        print(f"  {i+1}. {step.__class__.__name__}")
-
-    print("\nExpected flow:")
-    print("  - TemplateLoaderStep loads the template")
-    print("  - GradeStep grades directly from config (one-pass)")
-    print("  - ExporterStep exports results")
-
-    # Verify GradeStep has criteria_json for single submission mode
-    grade_step = None
-    for step in pipeline._steps:
-        if step.__class__.__name__ == "GradeStep":
-            grade_step = step
-            break
-
-    assert grade_step is not None, "GradeStep not found in pipeline"
-    assert grade_step._criteria_json is not None, "GradeStep should have criteria_json in single mode"
-    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
-
-    print("\n✓ Single submission mode configured correctly")
-    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
-    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
-
-
-def test_multi_submission_mode():
-    """Test multi-submission mode (build tree, then grade)."""
-    print("\n" + "="*80)
-    print("TEST: Multi-Submission Mode (Tree Building)")
-    print("="*80)
-
-    criteria = create_simple_criteria()
-    submission = create_mock_submission()
-
-    # Build pipeline for multiple submissions
-    pipeline = build_pipeline(
-        template_name="input_output",
-        include_feedback=False,
-        grading_criteria=criteria,
-        feedback_config=None,
-        setup_config=None,
-        custom_template=None,
-        feedback_mode=None,
-        submission_files=submission,
-        submission_id="test_002",
-        is_multi_submission=True  # Multi-submission mode
-    )
-
-    # Verify pipeline steps
-    print("\nPipeline Steps:")
-    for i, step in enumerate(pipeline._steps):
-        print(f"  {i+1}. {step.__class__.__name__}")
-
-    print("\nExpected flow:")
-    print("  - TemplateLoaderStep loads the template")
-    print("  - BuildTreeStep builds criteria tree (reusable)")
-    print("  - GradeStep grades from tree")
-    print("  - ExporterStep exports results")
-
-    # Verify BuildTreeStep and GradeStep are present
-    has_build_tree = False
-    grade_step = None
-
-    for step in pipeline._steps:
-        if step.__class__.__name__ == "BuildTreeStep":
-            has_build_tree = True
-        elif step.__class__.__name__ == "GradeStep":
-            grade_step = step
-
-    assert has_build_tree, "BuildTreeStep not found in pipeline for multi-submission mode"
-    assert grade_step is not None, "GradeStep not found in pipeline"
-    assert grade_step._criteria_json is None, "GradeStep should NOT have criteria_json in multi mode"
-    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
-
-    print("\n✓ Multi-submission mode configured correctly")
-    print(f"  - BuildTreeStep present: {has_build_tree}")
-    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
-    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
-
-
-def test_grade_step_input_detection():
-    """Test that GradeStep correctly detects input type."""
-    print("\n" + "="*80)
-    print("TEST: GradeStep Input Type Detection")
-    print("="*80)
-
-    from autograder.steps.grade_step import GradeStep
-    from autograder.models.abstract.template import Template
-    from autograder.models.criteria_tree import CriteriaTree, CategoryNode
-
-    criteria = create_simple_criteria()
-    submission = create_mock_submission()
-
-    # Test 1: GradeStep with Template input (single mode)
-    print("\n1. Testing with Template input (single submission mode):")
-    grade_step_single = GradeStep(
-        criteria_json=criteria,
-        submission_files=submission,
-        submission_id="test_single"
-    )
-
-    # Create a mock template
-    class MockTemplate(Template):
-        def __init__(self):
-            self.name = "mock_template"
-            self.tests = {}
-
-        def get_test(self, test_name):
-            # Return a mock test function
-            def mock_test(*args, **kwargs):
-                return {"passed": True, "score": 100}
-            return mock_test
-
-    mock_template = MockTemplate()
-
-    print("  - Input type: Template")
-    print("  - Expected behavior: Grade from config (one-pass)")
-    print("  ✓ GradeStep will use grade_from_config method")
-
-    # Test 2: GradeStep with CriteriaTree input (multi mode)
-    print("\n2. Testing with CriteriaTree input (multi-submission mode):")
-    grade_step_multi = GradeStep(
-        submission_files=submission,
-        submission_id="test_multi"
-    )
-
-    # Create a mock criteria tree
-    mock_tree = CriteriaTree(
-        base=CategoryNode(name="base", weight=100),
-        bonus=None,
-        penalty=None
-    )
-
-    print("  - Input type: CriteriaTree")
-    print("  - Expected behavior: Grade from tree (reusable)")
-    print("  ✓ GradeStep will use grade_from_tree method")
-
-
-if __name__ == "__main__":
-    print("\n" + "="*80)
-    print("PIPELINE MODE TESTS")
-    print("="*80)
-
-    try:
-        test_single_submission_mode()
-        test_multi_submission_mode()
-        test_grade_step_input_detection()
-
-        print("\n" + "="*80)
-        print("ALL TESTS PASSED ✓")
-        print("="*80)
-        print("\nSummary:")
-        print("  ✓ Single submission mode: Grades directly from config")
-        print("  ✓ Multi-submission mode: Builds tree once, grades multiple times")
-        print("  ✓ GradeStep correctly detects input type (Template vs CriteriaTree)")
-        print("  ✓ Pipeline configuration is flexible and optimized")
-
-    except AssertionError as e:
-        print(f"\n❌ TEST FAILED: {e}")
-        sys.exit(1)
-    except Exception as e:
-        print(f"\n❌ ERROR: {e}")
-        import traceback
-        traceback.print_exc()
-        sys.exit(1)
-
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/unit/test_pipeline_steps.py b/tests/unit/test_pipeline_steps.py
deleted file mode 100644
index 8cd5692..0000000
--- a/tests/unit/test_pipeline_steps.py
+++ /dev/null
@@ -1,328 +0,0 @@
-"""
-Unit tests for BuildTreeStep and GradeStep.
-
-These tests verify:
-1. BuildTreeStep correctly builds a CriteriaTree from config
-2. GradeStep intelligently handles both CriteriaTree and Template inputs
-3. Single vs multi-submission pipeline modes work correctly
-"""
-
-import sys
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-from autograder.steps.build_tree_step import BuildTreeStep
-from autograder.steps.grade_step import GradeStep
-from autograder.models.config.criteria import CriteriaConfig
-from autograder.models.dataclass.step_result import StepStatus
-from autograder.models.abstract.template import Template
-from autograder.models.abstract.test_function import TestFunction
-from autograder.models.dataclass.test_result import TestResult
-
-
-# Mock Template and TestFunction for testing
-class MockTestFunction(TestFunction):
-    """Mock test function that always passes."""
-
-    def __init__(self, test_name):
-        self._test_name = test_name
-
-    @property
-    def name(self):
-        return self._test_name
-
-    @property
-    def description(self):
-        return f"Mock test function: {self._test_name}"
-
-    def execute(self, *args, **kwargs):
-        """Always return a passing result."""
-        return TestResult(
-            test_name=self._test_name,
-            passed=True,
-            score=100.0,
-            max_score=100.0,
-            message="Test passed (mock)",
-        )
-
-
-class MockTemplate(Template):
-    """Mock template with pre-defined test functions."""
-
-    def __init__(self):
-        self.name = "mock_template"
-        self._tests = {
-            "expect_output": MockTestFunction("expect_output"),
-            "check_file": MockTestFunction("check_file"),
-            "validate_input": MockTestFunction("validate_input"),
-        }
-
-    @property
-    def template_name(self):
-        """Get template name."""
-        return "mock_template"
-
-    @property
-    def template_description(self):
-        """Get template description."""
-        return "Mock template for testing purposes"
-
-    @property
-    def requires_pre_executed_tree(self) -> bool:
-        """Mock templates don't require pre-executed trees."""
-        return False
-
-    @property
-    def requires_execution_helper(self) -> bool:
-        """Mock templates don't require execution helpers."""
-        return False
-
-    @property
-    def execution_helper(self):
-        """No execution helper needed for mocks."""
-        return None
-
-    def stop(self):
-        """No cleanup needed for mock templates."""
-        pass
-
-    def get_test(self, test_name: str):
-        """Get a test function by name."""
-        return self._tests.get(test_name)
-
-    def get_available_tests(self):
-        """Get list of available test names."""
-        return list(self._tests.keys())
-
-
-def create_simple_criteria():
-    """Create a simple criteria configuration for testing."""
-    return {
-        "test_library": "input_output",
-        "base": {
-            "weight": 100,
-            "subjects": [
-                {
-                    "subject_name": "Basic Tests",
-                    "weight": 100,
-                    "tests": [
-                        {
-                            "name": "expect_output",
-                            "file": "main.py",
-                            "parameters": [
-                                {"name": "stdin_input", "value": ["hello"]},
-                                {"name": "expected_output", "value": "hello"},
-                            ],
-                        },
-                        {
-                            "name": "expect_output",
-                            "file": "main.py",
-                            "parameters": [
-                                {"name": "stdin_input", "value": ["world"]},
-                                {"name": "expected_output", "value": "world"},
-                            ],
-                        },
-                    ],
-                }
-            ],
-        },
-        "bonus": {
-            "weight": 10,
-            "tests": [
-                {
-                    "name": "expect_output",
-                    "file": "main.py",
-                    "parameters": [
-                        {"name": "stdin_input", "value": ["bonus"]},
-                        {"name": "expected_output", "value": "bonus"},
-                    ],
-                }
-            ],
-        },
-    }
-
-
-def create_mock_submission():
-    """Create mock submission files."""
-    return {"main.py": "# Simple echo program\nprint(input())"}
-
-
-def test_build_tree_step():
-    """Test that BuildTreeStep correctly builds a CriteriaTree."""
-    print("\n" + "=" * 80)
-    print("TEST: BuildTreeStep")
-    print("=" * 80)
-
-    # Create criteria and template
-    criteria = create_simple_criteria()
-    template = MockTemplate()
-
-    # Create and execute step
-    build_step = BuildTreeStep(criteria)
-    result = build_step.execute(template)
-
-    # Verify result
-    assert result.status == StepStatus.SUCCESS, f"Build step failed: {result.error}"
-    assert result.data is not None, "CriteriaTree is None"
-
-    criteria_tree = result.data
-
-    # Verify tree structure
-    assert criteria_tree.base is not None, "Base category missing"
-    assert criteria_tree.bonus is not None, "Bonus category missing"
-
-    print("✓ BuildTreeStep successfully built CriteriaTree")
-    print(f"  - Base category: {criteria_tree.base.name}")
-    print(f"  - Bonus category: {criteria_tree.bonus.name}")
-
-    # Print tree structure
-    print("\nCriteria Tree Structure:")
-    criteria_tree.print_tree()
-
-    return criteria_tree
-
-
-def test_grade_from_tree():
-    """Test that GradeStep can grade from a CriteriaTree."""
-    print("\n" + "=" * 80)
-    print("TEST: GradeStep with CriteriaTree (Multi-Submission Mode)")
-    print("=" * 80)
-
-    # Build criteria tree first
-    criteria = create_simple_criteria()
-    template = MockTemplate()
-    build_step = BuildTreeStep(criteria)
-    build_result = build_step.execute(template)
-
-    criteria_tree = build_result.data
-    submission_files = create_mock_submission()
-
-    # Create and execute grade step
-    grade_step = GradeStep(
-        submission_files=submission_files, submission_id="test_submission_1"
-    )
-
-    result = grade_step.execute(criteria_tree)
-
-    # Verify result
-    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
-    assert result.data is not None, "GradingResult is None"
-
-    grading_result = result.data
-
-    print("✓ GradeStep successfully graded from CriteriaTree")
-    print(f"  - Final Score: {grading_result.final_score}")
-    print(f"  - Status: {grading_result.status}")
-
-    # Print result tree
-    if grading_result.result_tree:
-        print("\nResult Tree:")
-        grading_result.result_tree.print_tree()
-
-    return grading_result
-
-
-def test_grade_from_config():
-    """Test that GradeStep can grade directly from config (single submission mode)."""
-    print("\n" + "=" * 80)
-    print("TEST: GradeStep with Template (Single Submission Mode)")
-    print("=" * 80)
-
-    # Create criteria and template
-    criteria = create_simple_criteria()
-    template = MockTemplate()
-    submission_files = create_mock_submission()
-
-    # Create and execute grade step (without building tree first)
-    grade_step = GradeStep(
-        criteria_json=criteria,
-        submission_files=submission_files,
-        submission_id="test_submission_2",
-    )
-
-    result = grade_step.execute(template)
-
-    # Verify result
-    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
-    assert result.data is not None, "GradingResult is None"
-
-    grading_result = result.data
-
-    print("✓ GradeStep successfully graded from config")
-    print(f"  - Final Score: {grading_result.final_score}")
-    print(f"  - Status: {grading_result.status}")
-
-    # Print result tree
-    if grading_result.result_tree:
-        print("\nResult Tree:")
-        grading_result.result_tree.print_tree()
-
-    return grading_result
-
-
-def test_invalid_input_type():
-    """Test that GradeStep rejects invalid input types."""
-    print("\n" + "=" * 80)
-    print("TEST: GradeStep with Invalid Input Type")
-    print("=" * 80)
-
-    submission_files = create_mock_submission()
-
-    grade_step = GradeStep(
-        submission_files=submission_files, submission_id="test_submission_3"
-    )
-
-    # Try to execute with invalid input (string)
-    result = grade_step.execute("invalid input")
-
-    # Verify it fails gracefully
-    assert result.status == StepStatus.FAIL, "Should fail with invalid input"
-    assert result.error is not None, "Should have error message"
-
-    print("✓ GradeStep correctly rejected invalid input")
-    print(f"  - Error: {result.error}")
-
-
-def run_all_tests():
-    """Run all unit tests."""
-    print("\n" + "#" * 80)
-    print("# RUNNING PIPELINE STEPS UNIT TESTS")
-    print("#" * 80)
-
-    try:
-        # Test 1: Build tree
-        criteria_tree = test_build_tree_step()
-
-        # Test 2: Grade from tree (multi-submission mode)
-        grading_result_tree = test_grade_from_tree()
-
-        # Test 3: Grade from config (single submission mode)
-        grading_result_config = test_grade_from_config()
-
-        # Test 4: Invalid input handling
-        test_invalid_input_type()
-
-        print("\n" + "#" * 80)
-        print("# ALL TESTS PASSED! ✓")
-        print("#" * 80)
-
-    except AssertionError as e:
-        print("\n" + "#" * 80)
-        print(f"# TEST FAILED: {e}")
-        print("#" * 80)
-        raise
-    except Exception as e:
-        print("\n" + "#" * 80)
-        print(f"# UNEXPECTED ERROR: {e}")
-        print("#" * 80)
-        import traceback
-
-        traceback.print_exc()
-        raise
-
-
-if __name__ == "__main__":
-    run_all_tests()

From 80467c809f79e5859aae1cc29d8c56bb944968f5 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 19 Jan 2026 21:26:58 -0300
Subject: [PATCH 36/49] Revert "refactor: delete current tests"

This reverts commit d0b30f8f75d21e1aee18b608b107e30963f721ce.
---
 tests/__init__.py                             |   0
 ...der_API_Collection.postman_collection.json | 329 +++++++++++
 tests/data/README.md                          | 510 ++++++++++++++++++
 tests/data/api_request_schema.json            | 266 +++++++++
 tests/data/api_testing/criteria.json          |  37 ++
 tests/data/api_testing/feedback.json          |  14 +
 tests/data/api_testing/package.json           |  12 +
 tests/data/api_testing/server.js              |  56 ++
 tests/data/api_testing/setup.json             |   8 +
 tests/data/curl_examples.sh                   | 246 +++++++++
 tests/data/custom_template/criteria.json      |   0
 tests/data/custom_template/custom_template.py | 116 ++++
 tests/data/custom_template/feedback.json      |  12 +
 tests/data/custom_template/main.py            |   0
 tests/data/essay/criteria.json                |   0
 tests/data/essay/essay.txt                    |   0
 tests/data/essay/feedback.json                |   0
 tests/data/input_output/calculator.py         |  46 ++
 tests/data/input_output/criteria.json         |   0
 tests/data/input_output/feedback.json         |  14 +
 tests/data/input_output/requirements.txt      |   0
 tests/data/input_output/setup.json            |   0
 tests/data/web_dev/criteria.json              |  35 ++
 tests/data/web_dev/feedback.json              |  15 +
 tests/data/web_dev/index.html                 |  39 ++
 tests/data/web_dev/script.js                  |  33 ++
 tests/data/web_dev/style.css                  |   0
 tests/playroom.py                             |  46 ++
 tests/playroom/README.md                      | 286 ++++++++++
 tests/playroom/__init__.py                    |  10 +
 tests/playroom/api_playroom.py                | 250 +++++++++
 tests/playroom/essay_playroom.py              | 287 ++++++++++
 tests/playroom/io_playroom.py                 | 226 ++++++++
 tests/playroom/run_all_playrooms.py           |   0
 tests/playroom/webdev_playroom.py             | 241 +++++++++
 tests/test_pipeline_modes.py                  | 252 +++++++++
 tests/unit/__init__.py                        |   0
 tests/unit/test_pipeline_steps.py             | 328 +++++++++++
 38 files changed, 3714 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/data/Autograder_API_Collection.postman_collection.json
 create mode 100644 tests/data/README.md
 create mode 100644 tests/data/api_request_schema.json
 create mode 100644 tests/data/api_testing/criteria.json
 create mode 100644 tests/data/api_testing/feedback.json
 create mode 100644 tests/data/api_testing/package.json
 create mode 100644 tests/data/api_testing/server.js
 create mode 100644 tests/data/api_testing/setup.json
 create mode 100644 tests/data/curl_examples.sh
 create mode 100644 tests/data/custom_template/criteria.json
 create mode 100644 tests/data/custom_template/custom_template.py
 create mode 100644 tests/data/custom_template/feedback.json
 create mode 100644 tests/data/custom_template/main.py
 create mode 100644 tests/data/essay/criteria.json
 create mode 100644 tests/data/essay/essay.txt
 create mode 100644 tests/data/essay/feedback.json
 create mode 100644 tests/data/input_output/calculator.py
 create mode 100644 tests/data/input_output/criteria.json
 create mode 100644 tests/data/input_output/feedback.json
 create mode 100644 tests/data/input_output/requirements.txt
 create mode 100644 tests/data/input_output/setup.json
 create mode 100644 tests/data/web_dev/criteria.json
 create mode 100644 tests/data/web_dev/feedback.json
 create mode 100644 tests/data/web_dev/index.html
 create mode 100644 tests/data/web_dev/script.js
 create mode 100644 tests/data/web_dev/style.css
 create mode 100644 tests/playroom.py
 create mode 100644 tests/playroom/README.md
 create mode 100644 tests/playroom/__init__.py
 create mode 100644 tests/playroom/api_playroom.py
 create mode 100644 tests/playroom/essay_playroom.py
 create mode 100644 tests/playroom/io_playroom.py
 create mode 100644 tests/playroom/run_all_playrooms.py
 create mode 100644 tests/playroom/webdev_playroom.py
 create mode 100644 tests/test_pipeline_modes.py
 create mode 100644 tests/unit/__init__.py
 create mode 100644 tests/unit/test_pipeline_steps.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/Autograder_API_Collection.postman_collection.json b/tests/data/Autograder_API_Collection.postman_collection.json
new file mode 100644
index 0000000..8336d7f
--- /dev/null
+++ b/tests/data/Autograder_API_Collection.postman_collection.json
@@ -0,0 +1,329 @@
+{
+  "info": {
+    "name": "Autograder API Collection",
+    "description": "Complete API collection for testing the Autograder API with various templates and scenarios",
+    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+  },
+  "variable": [
+    {
+      "key": "base_url",
+      "value": "http://localhost:8001",
+      "type": "string"
+    }
+  ],
+  "item": [
+    {
+      "name": "Grade Submission - Web Dev",
+      "request": {
+        "method": "POST",
+        "header": [],
+        "body": {
+          "mode": "formdata",
+          "formdata": [
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/web_dev/index.html"
+            },
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/web_dev/style.css"
+            },
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/web_dev/script.js"
+            },
+            {
+              "key": "criteria_json",
+              "type": "file",
+              "src": "tests/data/web_dev/criteria.json"
+            },
+            {
+              "key": "feedback_json",
+              "type": "file",
+              "src": "tests/data/web_dev/feedback.json"
+            },
+            {
+              "key": "template_preset",
+              "value": "web dev",
+              "type": "text"
+            },
+            {
+              "key": "student_name",
+              "value": "John Doe",
+              "type": "text"
+            },
+            {
+              "key": "student_credentials",
+              "value": "test-token-123",
+              "type": "text"
+            },
+            {
+              "key": "include_feedback",
+              "value": "true",
+              "type": "text"
+            },
+            {
+              "key": "feedback_type",
+              "value": "default",
+              "type": "text"
+            }
+          ]
+        },
+        "url": {
+          "raw": "{{base_url}}/grade_submission/",
+          "host": ["{{base_url}}"],
+          "path": ["grade_submission", ""]
+        },
+        "description": "Test web development template with HTML, CSS, and JavaScript files"
+      },
+      "response": []
+    },
+    {
+      "name": "Grade Submission - API Testing",
+      "request": {
+        "method": "POST",
+        "header": [],
+        "body": {
+          "mode": "formdata",
+          "formdata": [
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/api_testing/server.js"
+            },
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/api_testing/package.json"
+            },
+            {
+              "key": "criteria_json",
+              "type": "file",
+              "src": "tests/data/api_testing/criteria.json"
+            },
+            {
+              "key": "feedback_json",
+              "type": "file",
+              "src": "tests/data/api_testing/feedback.json"
+            },
+            {
+              "key": "setup_json",
+              "type": "file",
+              "src": "tests/data/api_testing/setup.json"
+            },
+            {
+              "key": "template_preset",
+              "value": "api",
+              "type": "text"
+            },
+            {
+              "key": "student_name",
+              "value": "Jane Smith",
+              "type": "text"
+            },
+            {
+              "key": "student_credentials",
+              "value": "test-token-456",
+              "type": "text"
+            },
+            {
+              "key": "include_feedback",
+              "value": "true",
+              "type": "text"
+            },
+            {
+              "key": "feedback_type",
+              "value": "default",
+              "type": "text"
+            }
+          ]
+        },
+        "url": {
+          "raw": "{{base_url}}/grade_submission/",
+          "host": ["{{base_url}}"],
+          "path": ["grade_submission", ""]
+        },
+        "description": "Test API testing template with Node.js Express server"
+      },
+      "response": []
+    },
+    {
+      "name": "Grade Submission - Input/Output",
+      "request": {
+        "method": "POST",
+        "header": [],
+        "body": {
+          "mode": "formdata",
+          "formdata": [
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/input_output/calculator.py"
+            },
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/input_output/requirements.txt"
+            },
+            {
+              "key": "criteria_json",
+              "type": "file",
+              "src": "tests/data/input_output/criteria.json"
+            },
+            {
+              "key": "feedback_json",
+              "type": "file",
+              "src": "tests/data/input_output/feedback.json"
+            },
+            {
+              "key": "setup_json",
+              "type": "file",
+              "src": "tests/data/input_output/setup.json"
+            },
+            {
+              "key": "template_preset",
+              "value": "io",
+              "type": "text"
+            },
+            {
+              "key": "student_name",
+              "value": "Bob Johnson",
+              "type": "text"
+            },
+            {
+              "key": "student_credentials",
+              "value": "test-token-789",
+              "type": "text"
+            },
+            {
+              "key": "include_feedback",
+              "value": "true",
+              "type": "text"
+            },
+            {
+              "key": "feedback_type",
+              "value": "default",
+              "type": "text"
+            }
+          ]
+        },
+        "url": {
+          "raw": "{{base_url}}/grade_submission/",
+          "host": ["{{base_url}}"],
+          "path": ["grade_submission", ""]
+        },
+        "description": "Test input/output template with Python calculator"
+      },
+      "response": []
+    },
+    {
+      "name": "Grade Submission - Custom Template",
+      "request": {
+        "method": "POST",
+        "header": [],
+        "body": {
+          "mode": "formdata",
+          "formdata": [
+            {
+              "key": "submission_files",
+              "type": "file",
+              "src": "tests/data/custom_template/main.py"
+            },
+            {
+              "key": "criteria_json",
+              "type": "file",
+              "src": "tests/data/custom_template/criteria.json"
+            },
+            {
+              "key": "feedback_json",
+              "type": "file",
+              "src": "tests/data/custom_template/feedback.json"
+            },
+            {
+              "key": "custom_template",
+              "type": "file",
+              "src": "tests/data/custom_template/custom_template.py"
+            },
+            {
+              "key": "template_preset",
+              "value": "custom",
+              "type": "text"
+            },
+            {
+              "key": "student_name",
+              "value": "Alice Williams",
+              "type": "text"
+            },
+            {
+              "key": "student_credentials",
+              "value": "test-token-101",
+              "type": "text"
+            },
+            {
+              "key": "include_feedback",
+              "value": "true",
+              "type": "text"
+            },
+            {
+              "key": "feedback_type",
+              "value": "default",
+              "type": "text"
+            }
+          ]
+        },
+        "url": {
+          "raw": "{{base_url}}/grade_submission/",
+          "host": ["{{base_url}}"],
+          "path": ["grade_submission", ""]
+        },
+        "description": "Test custom template with custom grading logic"
+      },
+      "response": []
+    },
+    {
+      "name": "Get Template Info - Web Dev",
+      "request": {
+        "method": "GET",
+        "header": [],
+        "url": {
+          "raw": "{{base_url}}/templates/webdev",
+          "host": ["{{base_url}}"],
+          "path": ["template", "web_dev"]
+        },
+        "description": "Get information about the web development template"
+      },
+      "response": []
+    },
+    {
+      "name": "Get Template Info - API",
+      "request": {
+        "method": "GET",
+        "header": [],
+        "url": {
+          "raw": "{{base_url}}/templates/api",
+          "host": ["{{base_url}}"],
+          "path": ["template", "api"]
+        },
+        "description": "Get information about the API testing template"
+      },
+      "response": []
+    },
+    {
+      "name": "Get Template Info - Input/Output",
+      "request": {
+        "method": "GET",
+        "header": [],
+        "url": {
+          "raw": "{{base_url}}/templates/io",
+          "host": ["{{base_url}}"],
+          "path": ["template", "io"]
+        },
+        "description": "Get information about the input/output template"
+      },
+      "response": []
+    }
+  ]
+}
diff --git a/tests/data/README.md b/tests/data/README.md
new file mode 100644
index 0000000..df71884
--- /dev/null
+++ b/tests/data/README.md
@@ -0,0 +1,510 @@
+# Autograder API Test Suite
+
+This directory contains comprehensive test data and a testing script for the Autograder API. The test suite covers all supported template types and provides realistic submission scenarios.
+
+## 📁 Directory Structure
+
+```
+tests/data/
+├── web_dev/              # Web Development template test data
+│   ├── index.html        # Student HTML submission
+│   ├── style.css         # Student CSS submission
+│   ├── script.js         # Student JavaScript submission
+│   ├── criteria.json     # Grading criteria configuration
+│   └── feedback.json     # Feedback configuration
+│
+├── api_testing/          # API Testing template test data
+│   ├── server.js         # Student Node.js API server
+│   ├── package.json      # NPM dependencies
+│   ├── criteria.json     # API testing criteria
+│   ├── feedback.json     # Feedback configuration
+│   └── setup.json        # Container setup (runtime, commands)
+│
+├── input_output/         # Input/Output template test data
+│   ├── calculator.py     # Student Python program
+│   ├── requirements.txt  # Python dependencies
+│   ├── criteria.json     # I/O testing criteria
+│   ├── feedback.json     # Feedback configuration
+│   └── setup.json        # Container setup
+│
+├── essay/                # Essay template test data
+│   ├── essay.txt         # Student essay text
+│   ├── criteria.json     # Essay grading criteria
+│   └── feedback.json     # Feedback configuration
+│
+└── custom_template/      # Custom Template test data
+    ├── main.py           # Student Python submission
+    ├── custom_template.py # Custom grading template
+    ├── criteria.json     # Custom criteria
+    └── feedback.json     # Feedback configuration
+```
+
+## 🚀 Quick Start
+
+### Prerequisites
+
+```bash
+# Install required Python package
+pip install requests
+```
+
+### Running Tests
+
+**Interactive Menu Mode:**
+```bash
+python test_api_requests.py
+```
+
+**Direct Test Execution:**
+```bash
+# Test specific template
+python test_api_requests.py --test web
+python test_api_requests.py --test api
+python test_api_requests.py --test io
+python test_api_requests.py --test essay
+python test_api_requests.py --test custom
+
+# Run all tests
+python test_api_requests.py --test all
+```
+
+**Custom API URL:**
+```bash
+python test_api_requests.py --url http://api.example.com:8000
+```
+
+## 📋 API Endpoints
+
+### 1. Grade Submission (POST)
+
+**Endpoint:** `/grade_submission/`
+
+**Request Format:**
+- Method: `POST`
+- Content-Type: `multipart/form-data`
+
+**Form Fields:**
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `submission_files` | File[] | ✅ | Student's source code files |
+| `template_preset` | String | ✅ | Template type: "web dev", "api", "io", "essay", "custom" |
+| `student_name` | String | ✅ | Student's name |
+| `student_credentials` | String | ✅ | GitHub token or credentials |
+| `include_feedback` | Boolean | ✅ | Whether to include detailed feedback |
+| `criteria_json` | File | ✅ | JSON file with grading criteria |
+| `feedback_type` | String | ⚠️ | "default" or "ai" (default: "default") |
+| `feedback_json` | File | ⚠️ | JSON file with feedback configuration |
+| `setup_json` | File | ⚠️ | JSON file for container setup (required for api/io) |
+| `custom_template` | File | ⚠️ | Python file with custom template (required for "custom") |
+| `openai_key` | String | ⚠️ | OpenAI API key (for AI feedback) |
+| `redis_url` | String | ⚠️ | Redis URL (for AI feedback caching) |
+| `redis_token` | String | ⚠️ | Redis token |
+
+**Response Format:**
+```json
+{
+  "server_status": "Server connection happened successfully",
+  "autograding_status": "completed",
+  "final_score": 85.5,
+  "feedback": "...",
+  "test_report": [
+    {
+      "name": "has_tag",
+      "score": 100,
+      "report": "Found 5 of 5 required div tags",
+      "parameters": {"tag": "div", "required_count": 5}
+    }
+  ]
+}
+```
+
+### 2. Get Template Info (GET)
+
+**Endpoint:** `/template/{template_name}`
+
+**Example:**
+```bash
+GET /templates/webdev
+GET /templates/api
+GET /templates/io
+GET /templates/essay
+```
+
+**Response:** Returns template metadata including available tests and their parameters.
+
+## 📦 Payload Examples
+
+### 1. Web Development Template
+
+**Template:** `web dev`  
+**Files:** HTML, CSS, JavaScript  
+**No Setup Required:** Tests run directly on static files
+
+**Payload Structure:**
+```python
+files = [
+    ('submission_files', ('index.html', html_content, 'text/plain')),
+    ('submission_files', ('style.css', css_content, 'text/plain')),
+    ('submission_files', ('script.js', js_content, 'text/plain')),
+    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
+    ('feedback_json', ('feedback.json', feedback_content, 'application/json'))
+]
+
+data = {
+    'template_preset': 'web dev',
+    'student_name': 'John Doe',
+    'student_credentials': 'token-123',
+    'include_feedback': 'true',
+    'feedback_type': 'default'
+}
+```
+
+**Criteria Example:**
+```json
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "html_structure": {
+        "weight": 40,
+        "tests": [
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "calls": [
+              ["div", 5],
+              ["h1", 2],
+              ["p", 3]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+### 2. API Testing Template
+
+**Template:** `api`  
+**Files:** server.js, package.json  
+**Requires:** setup.json with Docker configuration
+
+**Payload Structure:**
+```python
+files = [
+    ('submission_files', ('server.js', server_content, 'text/plain')),
+    ('submission_files', ('package.json', package_content, 'text/plain')),
+    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
+    ('feedback_json', ('feedback.json', feedback_content, 'application/json')),
+    ('setup_json', ('setup.json', setup_content, 'application/json'))
+]
+
+data = {
+    'template_preset': 'api',
+    'student_name': 'Jane Smith',
+    'student_credentials': 'token-456',
+    'include_feedback': 'true',
+    'feedback_type': 'default'
+}
+```
+
+**Setup Example:**
+```json
+{
+  "runtime_image": "node:18-alpine",
+  "container_port": 8000,
+  "start_command": "node server.js",
+  "commands": {
+    "install_dependencies": "npm install"
+  }
+}
+```
+
+**Criteria Example:**
+```json
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "api_endpoints": {
+        "weight": 100,
+        "tests": [
+          {
+            "name": "health_check",
+            "calls": [["/health"]]
+          },
+          {
+            "name": "check_response_json",
+            "calls": [
+              ["/api/user/1", "id", 1],
+              ["/api/user/1", "name", "John Doe"]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+### 3. Input/Output Template
+
+**Template:** `io`  
+**Files:** Python script, requirements.txt  
+**Requires:** setup.json with Docker configuration
+
+**Payload Structure:**
+```python
+files = [
+    ('submission_files', ('calculator.py', program_content, 'text/plain')),
+    ('submission_files', ('requirements.txt', requirements, 'text/plain')),
+    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
+    ('feedback_json', ('feedback.json', feedback_content, 'application/json')),
+    ('setup_json', ('setup.json', setup_content, 'application/json'))
+]
+
+data = {
+    'template_preset': 'io',
+    'student_name': 'Bob Johnson',
+    'student_credentials': 'token-789',
+    'include_feedback': 'true',
+    'feedback_type': 'default'
+}
+```
+
+**Criteria Example:**
+```json
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "basic_operations": {
+        "weight": 100,
+        "tests": [
+          {
+            "name": "expect_output",
+            "calls": [
+              [["add", "5", "3"], "8"],
+              [["subtract", "10", "4"], "6"]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+### 4. Essay Template
+
+**Template:** `essay`  
+**Files:** Plain text essay  
+**No Setup Required:** Graded based on text content
+
+**Payload Structure:**
+```python
+files = [
+    ('submission_files', ('essay.txt', essay_content, 'text/plain')),
+    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
+    ('feedback_json', ('feedback.json', feedback_content, 'application/json'))
+]
+
+data = {
+    'template_preset': 'essay',
+    'student_name': 'Chris Lee',
+    'student_credentials': 'token-202',
+    'include_feedback': 'true',
+    'feedback_type': 'default'
+}
+```
+
+**Criteria Example:**
+```json
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "content_quality": {
+        "weight": 70,
+        "tests": [
+          {
+            "name": "check_keyword",
+            "file": "essay.txt",
+            "calls": [
+              ["introduction", 1],
+              ["conclusion", 1]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+### 5. Custom Template
+
+**Template:** `custom`  
+**Files:** Student submission + custom_template.py  
+**Requires:** Custom Python template file
+
+**Payload Structure:**
+```python
+files = [
+    ('submission_files', ('main.py', student_code, 'text/plain')),
+    ('criteria_json', ('criteria.json', criteria_content, 'application/json')),
+    ('feedback_json', ('feedback.json', feedback_content, 'application/json')),
+    ('custom_template', ('template.py', template_code, 'text/plain'))
+]
+
+data = {
+    'template_preset': 'custom',
+    'student_name': 'Alice Williams',
+    'student_credentials': 'token-101',
+    'include_feedback': 'true',
+    'feedback_type': 'default'
+}
+```
+
+## 🧪 Test Scenarios
+
+### Scenario 1: Web Development Portfolio
+Tests HTML structure, CSS styling, and JavaScript functionality for a student portfolio website.
+
+**Expected Results:**
+- ✅ HTML semantic tags detected
+- ✅ CSS classes and properties validated
+- ✅ JavaScript event listeners found
+- ✅ No console errors
+
+### Scenario 2: REST API Server
+Tests a Node.js Express API with multiple endpoints and JSON responses.
+
+**Expected Results:**
+- ✅ Health check endpoint responds
+- ✅ User data endpoints return correct JSON
+- ✅ POST requests create resources
+
+### Scenario 3: Python Calculator
+Tests a command-line calculator program with various mathematical operations.
+
+**Expected Results:**
+- ✅ Addition operation works correctly
+- ✅ Subtraction operation works correctly
+- ✅ Edge cases handled (division by zero, etc.)
+
+### Scenario 4: Essay Evaluation
+Evaluates a student's essay based on content quality, keyword presence, and structure.
+
+**Expected Results:**
+- ✅ Introduction and conclusion paragraphs present
+- ✅ Required keywords found
+- ✅ No spelling or grammar errors
+
+### Scenario 5: Custom Template
+Tests a custom grading template that checks for file existence and function definitions.
+
+**Expected Results:**
+- ✅ Required files present
+- ✅ Required functions defined
+
+## 🔧 Troubleshooting
+
+### Connection Errors
+```
+❌ ERROR: Could not connect to API at http://localhost:8001
+```
+**Solution:** Ensure the API server is running:
+```bash
+cd autograder/connectors/adapters/api
+python api_entrypoint.py
+```
+
+### Missing Test Data
+```
+FileNotFoundError: Test directory not found
+```
+**Solution:** Ensure you're running the script from the project root:
+```bash
+cd /path/to/autograder
+python test_api_requests.py
+```
+
+### Timeout Errors
+```
+❌ ERROR: Request timed out
+```
+**Solution:** 
+- Increase timeout in the script (default: 120 seconds)
+- Check if Docker containers are running properly
+- Verify network connectivity
+
+## 📊 Understanding Results
+
+### Score Interpretation
+- **100**: Perfect score - all tests passed
+- **0-99**: Partial score - some tests passed
+- **0**: Failed - no tests passed
+
+### Test Report Format
+Each test in the report includes:
+- `name`: Test function name
+- `score`: Score out of 100
+- `report`: Human-readable description
+- `parameters`: Test parameters used
+
+### Feedback Types
+- **default**: Standard feedback based on test results
+- **ai**: AI-generated feedback (requires OpenAI API key)
+
+## 🚀 AWS Lambda Deployment
+
+For deploying to AWS Lambda, the payload structure remains the same. However:
+
+1. **Base64 Encoding**: File contents must be base64 encoded
+2. **API Gateway**: Use multipart/form-data or JSON with base64 strings
+3. **Timeout**: Set Lambda timeout to at least 5 minutes for complex tests
+4. **Memory**: Allocate at least 2GB RAM for Docker operations
+
+**Example Lambda Payload:**
+```json
+{
+  "template_preset": "web dev",
+  "student_name": "John Doe",
+  "student_credentials": "token-123",
+  "include_feedback": true,
+  "submission_files": [
+    {
+      "filename": "index.html",
+      "content": "base64_encoded_content_here"
+    }
+  ],
+  "criteria": { /* criteria JSON */ },
+  "feedback": { /* feedback JSON */ }
+}
+```
+
+## 📝 Notes
+
+- All test data is realistic and follows best practices
+- Tests are designed to pass with provided submissions
+- Modify criteria.json to test different scenarios
+- Use setup.json for templates requiring runtime environments
+- Custom templates must inherit from the Template base class
+
+## 🤝 Contributing
+
+To add new test scenarios:
+
+1. Create a new directory under `tests/data/`
+2. Add submission files and configuration JSONs
+3. Update `test_api_requests.py` with a new test method
+4. Add the test to the interactive menu
+
+## 📚 Additional Resources
+
+- [API Documentation](../docs/api_reference.md)
+- [Template Guide](../docs/creating_assignments.md)
+- [Configuration Rules](../docs/CONFIGURATION_RULES.md)
diff --git a/tests/data/api_request_schema.json b/tests/data/api_request_schema.json
new file mode 100644
index 0000000..d5ef75b
--- /dev/null
+++ b/tests/data/api_request_schema.json
@@ -0,0 +1,266 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Autograder API Request Schema",
+  "description": "JSON Schema for validating Autograder API requests",
+  "type": "object",
+  "properties": {
+    "template_preset": {
+      "type": "string",
+      "enum": ["web dev", "api", "io", "custom", "essay"],
+      "description": "The grading template to use"
+    },
+    "student_name": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Name or identifier of the student"
+    },
+    "student_credentials": {
+      "type": "string",
+      "description": "Authentication token or credentials"
+    },
+    "include_feedback": {
+      "type": "boolean",
+      "description": "Whether to include detailed feedback in the response"
+    },
+    "feedback_type": {
+      "type": "string",
+      "enum": ["default", "ai"],
+      "default": "default",
+      "description": "Type of feedback generation"
+    },
+    "openai_key": {
+      "type": "string",
+      "description": "OpenAI API key (required if feedback_type is 'ai')"
+    },
+    "redis_url": {
+      "type": "string",
+      "format": "uri",
+      "description": "Redis connection URL for caching"
+    },
+    "redis_token": {
+      "type": "string",
+      "description": "Redis authentication token"
+    }
+  },
+  "required": [
+    "template_preset",
+    "student_name",
+    "student_credentials",
+    "include_feedback"
+  ],
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "feedback_type": {"const": "ai"}
+        }
+      },
+      "then": {
+        "required": ["openai_key"]
+      }
+    }
+  ],
+  "definitions": {
+    "criteria_schema": {
+      "type": "object",
+      "properties": {
+        "base": {
+          "type": "object",
+          "properties": {
+            "weight": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 100
+            },
+            "subjects": {
+              "type": "object",
+              "patternProperties": {
+                "^[a-zA-Z_][a-zA-Z0-9_]*$": {
+                  "type": "object",
+                  "properties": {
+                    "weight": {
+                      "type": "number",
+                      "minimum": 0,
+                      "maximum": 100
+                    },
+                    "tests": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "name": {
+                            "type": "string",
+                            "description": "Test function name"
+                          },
+                          "file": {
+                            "type": "string",
+                            "description": "Target file for the test"
+                          },
+                          "calls": {
+                            "type": "array",
+                            "items": {
+                              "type": "array",
+                              "description": "Array of parameters for each test call"
+                            }
+                          }
+                        },
+                        "required": ["name", "calls"]
+                      }
+                    }
+                  },
+                  "required": ["weight", "tests"]
+                }
+              }
+            }
+          },
+          "required": ["weight", "subjects"]
+        }
+      },
+      "required": ["base"]
+    },
+    "feedback_schema": {
+      "type": "object",
+      "properties": {
+        "general": {
+          "type": "object",
+          "properties": {
+            "report_title": {
+              "type": "string",
+              "description": "Title for the feedback report"
+            },
+            "show_passed_tests": {
+              "type": "boolean",
+              "description": "Whether to show passed tests in the report"
+            },
+            "show_test_details": {
+              "type": "boolean",
+              "description": "Whether to show detailed test information"
+            }
+          }
+        },
+        "default": {
+          "type": "object",
+          "properties": {
+            "category_headers": {
+              "type": "object",
+              "patternProperties": {
+                "^[a-zA-Z_][a-zA-Z0-9_]*$": {
+                  "type": "string",
+                  "description": "Custom header for a category"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "setup_schema": {
+      "type": "object",
+      "properties": {
+        "runtime_image": {
+          "type": "string",
+          "description": "Docker image to use for execution",
+          "examples": ["node:18-alpine", "python:3.11-slim"]
+        },
+        "container_port": {
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 65535,
+          "description": "Port number inside the container"
+        },
+        "start_command": {
+          "type": "string",
+          "description": "Command to start the application",
+          "examples": ["node server.js", "python app.py"]
+        },
+        "commands": {
+          "type": "object",
+          "properties": {
+            "install_dependencies": {
+              "type": "string",
+              "description": "Command to install dependencies",
+              "examples": ["npm install", "pip install -r requirements.txt"]
+            }
+          }
+        }
+      },
+      "required": ["runtime_image", "start_command"]
+    },
+    "response_schema": {
+      "type": "object",
+      "properties": {
+        "server_status": {
+          "type": "string",
+          "description": "Status of the server connection"
+        },
+        "autograding_status": {
+          "type": "string",
+          "enum": ["completed", "failed", "partial"],
+          "description": "Overall status of the autograding process"
+        },
+        "final_score": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 100,
+          "description": "Final calculated score"
+        },
+        "feedback": {
+          "type": "string",
+          "description": "Generated feedback text"
+        },
+        "test_report": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "name": {
+                "type": "string",
+                "description": "Test function name"
+              },
+              "score": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 100,
+                "description": "Score for this test"
+              },
+              "report": {
+                "type": "string",
+                "description": "Detailed test report"
+              },
+              "parameters": {
+                "type": "object",
+                "description": "Parameters used for the test"
+              }
+            },
+            "required": ["name", "score", "report"]
+          }
+        }
+      },
+      "required": ["server_status", "autograding_status", "final_score", "test_report"]
+    }
+  },
+  "examples": [
+    {
+      "template_preset": "web dev",
+      "student_name": "John Doe",
+      "student_credentials": "token-123",
+      "include_feedback": true,
+      "feedback_type": "default"
+    },
+    {
+      "template_preset": "api",
+      "student_name": "Jane Smith",
+      "student_credentials": "token-456",
+      "include_feedback": true,
+      "feedback_type": "default"
+    },
+    {
+      "template_preset": "custom",
+      "student_name": "Alice Williams",
+      "student_credentials": "token-789",
+      "include_feedback": true,
+      "feedback_type": "ai",
+      "openai_key": "sk-..."
+    }
+  ]
+}
diff --git a/tests/data/api_testing/criteria.json b/tests/data/api_testing/criteria.json
new file mode 100644
index 0000000..6ec1e78
--- /dev/null
+++ b/tests/data/api_testing/criteria.json
@@ -0,0 +1,37 @@
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "api_endpoints": {
+        "weight": 50,
+        "tests": [
+          {
+            "name": "health_check",
+            "calls": [
+              ["/health"]
+            ]
+          },
+          {
+            "name": "check_response_json",
+            "calls": [
+              ["/api/users", "users", []],
+              ["/api/user/1", "id", 1],
+              ["/api/user/1", "name", "John Doe"]
+            ]
+          }
+        ]
+      },
+      "api_methods": {
+        "weight": 50,
+        "tests": [
+          {
+            "name": "check_post_request",
+            "calls": [
+              ["/api/users", {"name": "Jane Smith", "email": "jane@example.com"}, 201]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
diff --git a/tests/data/api_testing/feedback.json b/tests/data/api_testing/feedback.json
new file mode 100644
index 0000000..d9921dd
--- /dev/null
+++ b/tests/data/api_testing/feedback.json
@@ -0,0 +1,14 @@
+{
+  "general": {
+    "report_title": "API Testing Assignment Feedback",
+    "show_passed_tests": true,
+    "show_test_details": true
+  },
+  "default": {
+    "category_headers": {
+      "base": "API Testing Requirements",
+      "api_endpoints": "API Endpoints",
+      "api_methods": "HTTP Methods"
+    }
+  }
+}
diff --git a/tests/data/api_testing/package.json b/tests/data/api_testing/package.json
new file mode 100644
index 0000000..62900cf
--- /dev/null
+++ b/tests/data/api_testing/package.json
@@ -0,0 +1,12 @@
+{
+  "name": "student-api",
+  "version": "1.0.0",
+  "description": "Student API assignment",
+  "main": "server.js",
+  "scripts": {
+    "start": "node server.js"
+  },
+  "dependencies": {
+    "express": "^4.18.2"
+  }
+}
diff --git a/tests/data/api_testing/server.js b/tests/data/api_testing/server.js
new file mode 100644
index 0000000..e558f06
--- /dev/null
+++ b/tests/data/api_testing/server.js
@@ -0,0 +1,56 @@
+const express = require('express');
+const app = express();
+const port = process.env.PORT || 8000;
+
+app.use(express.json());
+
+// In-memory database
+let users = [
+    { id: 1, name: 'John Doe', email: 'john@example.com' },
+    { id: 2, name: 'Jane Smith', email: 'jane@example.com' }
+];
+
+// Health check endpoint
+app.get('/health', (req, res) => {
+    res.status(200).json({ status: 'ok', message: 'API is running' });
+});
+
+// Get all users
+app.get('/api/users', (req, res) => {
+    res.json({ users: users });
+});
+
+// Get single user
+app.get('/api/user/:id', (req, res) => {
+    const userId = parseInt(req.params.id);
+    const user = users.find(u => u.id === userId);
+    
+    if (user) {
+        res.json(user);
+    } else {
+        res.status(404).json({ error: 'User not found' });
+    }
+});
+
+// Create new user
+app.post('/api/users', (req, res) => {
+    const { name, email } = req.body;
+    
+    if (!name || !email) {
+        return res.status(400).json({ error: 'Name and email are required' });
+    }
+    
+    const newUser = {
+        id: users.length + 1,
+        name,
+        email
+    };
+    
+    users.push(newUser);
+    res.status(201).json(newUser);
+});
+
+// Listen on 0.0.0.0 to accept external connections
+app.listen(port, '0.0.0.0', () => {
+    console.log(`Server is running on port ${port}`);
+});
diff --git a/tests/data/api_testing/setup.json b/tests/data/api_testing/setup.json
new file mode 100644
index 0000000..c2993d3
--- /dev/null
+++ b/tests/data/api_testing/setup.json
@@ -0,0 +1,8 @@
+{
+  "runtime_image": "node:18-alpine",
+  "container_port": 8000,
+  "start_command": "node server.js",
+  "commands": {
+    "install_dependencies": "npm install"
+  }
+}
diff --git a/tests/data/curl_examples.sh b/tests/data/curl_examples.sh
new file mode 100644
index 0000000..55e0dbc
--- /dev/null
+++ b/tests/data/curl_examples.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# Autograder API Test Examples using cURL
+# ========================================
+# This script contains cURL commands to test the Autograder API
+
+# Set the base URL (change this to your API endpoint)
+BASE_URL="http://localhost:8000"
+
+# Colors for output
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+# Function to print headers
+print_header() {
+    echo -e "\n${BLUE}========================================${NC}"
+    echo -e "${BLUE}$1${NC}"
+    echo -e "${BLUE}========================================${NC}\n"
+}
+
+# Change to the tests/data directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$SCRIPT_DIR"
+
+# ========================================
+# Test 1: Web Development Template
+# ========================================
+test_web_dev() {
+    print_header "TEST 1: Web Development Template"
+    
+    curl -X POST "$BASE_URL/grade_submission/" \
+      -F "submission_files=@web_dev/index.html" \
+      -F "submission_files=@web_dev/style.css" \
+      -F "submission_files=@web_dev/script.js" \
+      -F "criteria_json=@web_dev/criteria.json" \
+      -F "feedback_json=@web_dev/feedback.json" \
+      -F "template_preset=web dev" \
+      -F "student_name=John Doe" \
+      -F "student_credentials=test-token-123" \
+      -F "include_feedback=true" \
+      -F "feedback_type=default" \
+      | jq '.'
+}
+
+# ========================================
+# Test 2: API Testing Template
+# ========================================
+test_api() {
+    print_header "TEST 2: API Testing Template"
+    
+    curl -X POST "$BASE_URL/grade_submission/" \
+      -F "submission_files=@api_testing/server.js" \
+      -F "submission_files=@api_testing/package.json" \
+      -F "criteria_json=@api_testing/criteria.json" \
+      -F "feedback_json=@api_testing/feedback.json" \
+      -F "setup_json=@api_testing/setup.json" \
+      -F "template_preset=api" \
+      -F "student_name=Jane Smith" \
+      -F "student_credentials=test-token-456" \
+      -F "include_feedback=true" \
+      -F "feedback_type=default" \
+      | jq '.'
+}
+
+# ========================================
+# Test 3: Input/Output Template
+# ========================================
+test_io() {
+    print_header "TEST 3: Input/Output Template"
+    
+    curl -X POST "$BASE_URL/grade_submission/" \
+      -F "submission_files=@input_output/calculator.py" \
+      -F "submission_files=@input_output/requirements.txt" \
+      -F "criteria_json=@input_output/criteria.json" \
+      -F "feedback_json=@input_output/feedback.json" \
+      -F "setup_json=@input_output/setup.json" \
+      -F "template_preset=io" \
+      -F "student_name=Bob Johnson" \
+      -F "student_credentials=test-token-789" \
+      -F "include_feedback=true" \
+      -F "feedback_type=default" \
+      | jq '.'
+}
+
+# ========================================
+# Test 4: Essay Template
+# ========================================
+test_essay() {
+    print_header "TEST 4: Essay Template"
+    
+    curl -X POST "$BASE_URL/grade_submission/" \
+      -F "submission_files=@essay/essay.txt" \
+      -F "criteria_json=@essay/criteria.json" \
+      -F "feedback_json=@essay/feedback.json" \
+      -F "template_preset=essay" \
+      -F "student_name=Eve Adams" \
+      -F "student_credentials=test-token-202" \
+      -F "include_feedback=true" \
+      -F "feedback_type=default" \
+      | jq '.'
+}
+
+# ========================================
+# Test 5: Custom Template
+# ========================================
+test_custom() {
+    print_header "TEST 5: Custom Template"
+    
+    curl -X POST "$BASE_URL/grade_submission/" \
+      -F "submission_files=@custom_template/main.py" \
+      -F "criteria_json=@custom_template/criteria.json" \
+      -F "feedback_json=@custom_template/feedback.json" \
+      -F "custom_template=@custom_template/custom_template.py" \
+      -F "template_preset=custom" \
+      -F "student_name=Alice Williams" \
+      -F "student_credentials=test-token-101" \
+      -F "include_feedback=true" \
+      -F "feedback_type=default" \
+      | jq '.'
+}
+
+# ========================================
+# Template Info - Web Dev
+# ========================================
+test_template_info_web() {
+    print_header "TEMPLATE INFO: Web Dev"
+    
+    curl -X GET "$BASE_URL/templates/webdev" | jq '.'
+}
+
+# ========================================
+# Template Info - API
+# ========================================
+test_template_info_api() {
+    print_header "TEMPLATE INFO: API"
+    
+    curl -X GET "$BASE_URL/templates/api" | jq '.'
+}
+
+# ========================================
+# Template Info - I/O
+# ========================================
+test_template_info_io() {
+    print_header "TEMPLATE INFO: I/O"
+    
+    curl -X GET "$BASE_URL/templates/io" | jq '.'
+}
+
+# ========================================
+# Template Info - Essay
+# ========================================
+test_template_info_essay() {
+    print_header "TEMPLATE INFO: Essay"
+    
+    curl -X GET "$BASE_URL/templates/essay" | jq '.'
+}
+
+# ========================================
+# Main Menu
+# ========================================
+show_menu() {
+    echo -e "\n${GREEN}Autograder API Test Suite - cURL Edition${NC}"
+    echo "========================================"
+    echo "Base URL: $BASE_URL"
+    echo ""
+    echo "1. Test Web Development Template"
+    echo "2. Test API Testing Template"
+    echo "3. Test Input/Output Template"
+    echo "4. Test Essay Template"
+    echo "5. Test Custom Template"
+    echo "6. Get Template Info - Web Dev"
+    echo "7. Get Template Info - API"
+    echo "8. Get Template Info - I/O"
+    echo "9. Get Template Info - Essay"
+    echo "10. Run All Tests"
+    echo "11. Change Base URL"
+    echo "0. Exit"
+    echo ""
+}
+
+# Run all tests
+run_all() {
+    test_web_dev
+    test_api
+    test_io
+    test_essay
+    test_custom
+    test_template_info_web
+    test_template_info_api
+    test_template_info_io
+    test_template_info_essay
+}
+
+# Main loop
+if [ "$1" = "--all" ]; then
+    run_all
+elif [ "$1" = "--web" ]; then
+    test_web_dev
+elif [ "$1" = "--api" ]; then
+    test_api
+elif [ "$1" = "--io" ]; then
+    test_io
+elif [ "$1" = "--essay" ]; then
+    test_essay
+elif [ "$1" = "--custom" ]; then
+    test_custom
+elif [ "$1" = "--url" ] && [ -n "$2" ]; then
+    BASE_URL="$2"
+    echo "Base URL set to: $BASE_URL"
+    run_all
+else
+    # Interactive mode
+    while true; do
+        show_menu
+        read -p "Select an option (0-11): " choice
+        
+        case $choice in
+            1) test_web_dev ;;
+            2) test_api ;;
+            3) test_io ;;
+            4) test_essay ;;
+            5) test_custom ;;
+            6) test_template_info_web ;;
+            7) test_template_info_api ;;
+            8) test_template_info_io ;;
+            9) test_template_info_essay ;;
+            10) run_all ;;
+            11) 
+                read -p "Enter new base URL: " new_url
+                BASE_URL="$new_url"
+                echo -e "${GREEN}Base URL updated to: $BASE_URL${NC}"
+                ;;
+            0) 
+                echo -e "\n${GREEN}Goodbye!${NC}\n"
+                exit 0
+                ;;
+            *)
+                echo -e "${RED}Invalid option. Please select 0-11.${NC}"
+                ;;
+        esac
+        
+        echo ""
+        read -p "Press Enter to continue..."
+    done
+fi
diff --git a/tests/data/custom_template/criteria.json b/tests/data/custom_template/criteria.json
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/custom_template/custom_template.py b/tests/data/custom_template/custom_template.py
new file mode 100644
index 0000000..23ce232
--- /dev/null
+++ b/tests/data/custom_template/custom_template.py
@@ -0,0 +1,116 @@
+from autograder.builder.models.template import Template
+from autograder.builder.models.test_function import TestFunction
+from autograder.builder.models.param_description import ParamDescription
+from autograder.models.dataclass.test_result import TestResult
+from autograder.context import request_context
+
+
+class CheckFileExists(TestFunction):
+    """Test to check if a specific file exists in the submission."""
+    
+    @property
+    def name(self):
+        return "check_file_exists"
+    
+    @property
+    def description(self):
+        return "Checks if a specified file exists in the student submission."
+    
+    @property
+    def required_file(self):
+        return None
+    
+    @property
+    def parameter_description(self):
+        return [
+            ParamDescription("filename", "The name of the file to check for.", "string")
+        ]
+    
+    def execute(self, filename: str) -> TestResult:
+        request = request_context.get_request()
+        submission_files = request.submission_files
+
+        if filename in submission_files:
+            return TestResult(
+                self.name,
+                100,
+                f"File '{filename}' was found in the submission.",
+                parameters={"filename": filename}
+            )
+        else:
+            return TestResult(
+                self.name,
+                0,
+                f"File '{filename}' was NOT found in the submission.",
+                parameters={"filename": filename}
+            )
+
+
+class CheckFunctionExists(TestFunction):
+    """Test to check if a function is defined in a Python file."""
+    
+    @property
+    def name(self):
+        return "check_function_exists"
+    
+    @property
+    def description(self):
+        return "Checks if a function is defined in the main Python file."
+    
+    @property
+    def required_file(self):
+        return "PYTHON"
+    
+    @property
+    def parameter_description(self):
+        return [
+            ParamDescription("function_name", "The name of the function to check for.", "string")
+        ]
+    
+    def execute(self, python_content: str, function_name: str) -> TestResult:
+        if f"def {function_name}(" in python_content:
+            return TestResult(
+                self.name,
+                100,
+                f"Function '{function_name}()' was found in the code.",
+                parameters={"function_name": function_name}
+            )
+        else:
+            return TestResult(
+                self.name,
+                0,
+                f"Function '{function_name}()' was NOT found in the code.",
+                parameters={"function_name": function_name}
+            )
+
+
+class CustomTemplate(Template):
+    """A custom template for basic Python file checking."""
+    
+    @property
+    def template_name(self):
+        return "Custom Template"
+    
+    @property
+    def template_description(self):
+        return "A custom template for checking Python file structure."
+    
+    @property
+    def requires_pre_executed_tree(self) -> bool:
+        return False
+    
+    @property
+    def requires_execution_helper(self) -> bool:
+        return False
+    
+    def __init__(self, clean=False):
+        self.tests = {
+            "check_file_exists": CheckFileExists(),
+            "check_function_exists": CheckFunctionExists()
+        }
+    
+    def get_test(self, name: str) -> TestFunction:
+        test = self.tests.get(name)
+        if not test:
+            raise AttributeError(f"Test '{name}' not found in custom template.")
+        return test
diff --git a/tests/data/custom_template/feedback.json b/tests/data/custom_template/feedback.json
new file mode 100644
index 0000000..5629d00
--- /dev/null
+++ b/tests/data/custom_template/feedback.json
@@ -0,0 +1,12 @@
+{
+  "general": {
+    "report_title": "Custom Template Assignment Feedback",
+    "show_passed_tests": true,
+    "show_test_details": true
+  },
+  "default": {
+    "category_headers": {
+      "base": "Custom Template Requirements"
+    }
+  }
+}
diff --git a/tests/data/custom_template/main.py b/tests/data/custom_template/main.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/essay/criteria.json b/tests/data/essay/criteria.json
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/essay/essay.txt b/tests/data/essay/essay.txt
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/essay/feedback.json b/tests/data/essay/feedback.json
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/input_output/calculator.py b/tests/data/input_output/calculator.py
new file mode 100644
index 0000000..6be2dbd
--- /dev/null
+++ b/tests/data/input_output/calculator.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+"""
+Simple Calculator Program
+Reads operation and two numbers from stdin and outputs the result.
+"""
+
+import sys
+
+
+def main():
+    try:
+        # Read inputs from stdin
+        operation = input().strip()
+        num1 = float(input().strip())
+        num2 = float(input().strip())
+        
+        # Perform calculation based on operation
+        if operation == "add":
+            result = num1 + num2
+        elif operation == "subtract":
+            result = num1 - num2
+        elif operation == "multiply":
+            result = num1 * num2
+        elif operation == "divide":
+            if num2 == 0:
+                print("Error: Division by zero")
+                return
+            result = num1 / num2
+        else:
+            print(f"Error: Unknown operation '{operation}'")
+            return
+        
+        # Print result (integer if whole number, otherwise float)
+        if result == int(result):
+            print(int(result))
+        else:
+            print(result)
+            
+    except ValueError:
+        print("Error: Invalid input")
+    except EOFError:
+        print("Error: Unexpected end of input")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/data/input_output/criteria.json b/tests/data/input_output/criteria.json
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/input_output/feedback.json b/tests/data/input_output/feedback.json
new file mode 100644
index 0000000..9088486
--- /dev/null
+++ b/tests/data/input_output/feedback.json
@@ -0,0 +1,14 @@
+{
+  "general": {
+    "report_title": "Calculator Assignment Feedback",
+    "show_passed_tests": true,
+    "show_test_details": true
+  },
+  "default": {
+    "category_headers": {
+      "base": "Calculator Requirements",
+      "basic_operations": "Basic Operations",
+      "edge_cases": "Edge Cases & Special Scenarios"
+    }
+  }
+}
diff --git a/tests/data/input_output/requirements.txt b/tests/data/input_output/requirements.txt
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/input_output/setup.json b/tests/data/input_output/setup.json
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/web_dev/criteria.json b/tests/data/web_dev/criteria.json
new file mode 100644
index 0000000..ab144a7
--- /dev/null
+++ b/tests/data/web_dev/criteria.json
@@ -0,0 +1,35 @@
+{
+  "base": {
+    "weight": 100,
+    "subjects": {
+      "html_structure": {
+        "weight": 40,
+        "tests":
+        [
+          {
+            "name": "has_tag",
+            "file": "index.html",
+            "calls": [
+              ["div", 5],
+              ["h1", 2],
+              ["p", 3],
+              ["a", 2]
+            ]
+          }
+        ]
+      },
+      "css_styling": {
+        "weight": 30,
+        "tests": [
+          {
+            "name": "has_class",
+            "file": "index.html",
+            "calls": [
+              [["container", "row", "col-*"], 10]
+            ]
+          }
+        ]
+      }
+    }
+  }
+}
diff --git a/tests/data/web_dev/feedback.json b/tests/data/web_dev/feedback.json
new file mode 100644
index 0000000..0e1806d
--- /dev/null
+++ b/tests/data/web_dev/feedback.json
@@ -0,0 +1,15 @@
+{
+  "general": {
+    "report_title": "Web Development Assignment Feedback",
+    "show_passed_tests": true,
+    "show_test_details": true
+  },
+  "default": {
+    "category_headers": {
+      "base": "Core Web Development Requirements",
+      "html_structure": "HTML Structure & Semantics",
+      "css_styling": "CSS Styling & Design",
+      "javascript_functionality": "JavaScript Functionality"
+    }
+  }
+}
diff --git a/tests/data/web_dev/index.html b/tests/data/web_dev/index.html
new file mode 100644
index 0000000..c5207ea
--- /dev/null
+++ b/tests/data/web_dev/index.html
@@ -0,0 +1,39 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Student Portfolio</title>
+    <link rel="stylesheet" href="style.css">
+</head>
+<body>
+    <header class="container">
+        <h1>John Doe - Portfolio</h1>
+        <nav>
+            <a href="#about">About</a>
+            <a href="#projects">Projects</a>
+        </nav>
+    </header>
+
+    <div class="container row">
+        <div class="col-md-6">
+            <h1>Welcome</h1>
+            <p>This is my portfolio website showcasing my work.</p>
+            <p>I'm a passionate developer with experience in web technologies.</p>
+            <p>Check out my projects below!</p>
+        </div>
+        <div class="col-md-6">
+            <div class="card">
+                <h2>About Me</h2>
+                <p>I love coding and creating amazing web experiences.</p>
+            </div>
+        </div>
+    </div>
+
+    <footer class="container">
+        <p>&copy; 2024 John Doe</p>
+    </footer>
+
+    <script src="script.js"></script>
+</body>
+</html>
diff --git a/tests/data/web_dev/script.js b/tests/data/web_dev/script.js
new file mode 100644
index 0000000..68ea57c
--- /dev/null
+++ b/tests/data/web_dev/script.js
@@ -0,0 +1,33 @@
+// Interactive features for the portfolio
+
+document.addEventListener('DOMContentLoaded', function() {
+    console.log('Portfolio loaded successfully!');
+    
+    // Add smooth scrolling to navigation links
+    const navLinks = document.querySelectorAll('nav a');
+    
+    navLinks.forEach(link => {
+        link.addEventListener('click', function(e) {
+            e.preventDefault();
+            const targetId = this.getAttribute('href');
+            const targetElement = document.querySelector(targetId);
+            
+            if (targetElement) {
+                targetElement.scrollIntoView({ behavior: 'smooth' });
+            }
+        });
+    });
+    
+    // Add hover effect to cards
+    const cards = document.querySelectorAll('.card');
+    cards.forEach(card => {
+        card.addEventListener('mouseenter', function() {
+            this.style.transform = 'scale(1.05)';
+            this.style.transition = 'transform 0.3s ease';
+        });
+        
+        card.addEventListener('mouseleave', function() {
+            this.style.transform = 'scale(1)';
+        });
+    });
+});
diff --git a/tests/data/web_dev/style.css b/tests/data/web_dev/style.css
new file mode 100644
index 0000000..e69de29
diff --git a/tests/playroom.py b/tests/playroom.py
new file mode 100644
index 0000000..cf9e339
--- /dev/null
+++ b/tests/playroom.py
@@ -0,0 +1,46 @@
+"""
+DEPRECATED: This file has been moved to tests/playroom/ directory.
+
+The playroom functionality has been refactored into multiple template-specific
+playrooms for better organization and testing coverage.
+
+Please use one of the following:
+- tests/playroom/webdev_playroom.py - Web Development template
+- tests/playroom/api_playroom.py - API Testing template
+- tests/playroom/essay_playroom.py - Essay Grading template
+- tests/playroom/io_playroom.py - Input/Output template
+- tests/playroom/run_all_playrooms.py - Run all playrooms
+
+Usage:
+    python -m tests.playroom.webdev_playroom
+    python -m tests.playroom.run_all_playrooms
+    python -m tests.playroom.run_all_playrooms webdev
+
+See tests/playroom/README.md for full documentation.
+"""
+
+import sys
+import os
+
+# Add a deprecation warning
+print("\n" + "!" * 70)
+print("WARNING: This file is deprecated!")
+print("!" * 70)
+print("\nPlayrooms have been refactored into separate template-specific files.")
+print("Please use the new playroom directory structure:\n")
+print("  - tests/playroom/webdev_playroom.py")
+print("  - tests/playroom/api_playroom.py")
+print("  - tests/playroom/essay_playroom.py")
+print("  - tests/playroom/io_playroom.py")
+print("  - tests/playroom/run_all_playrooms.py")
+print("\nFor backward compatibility, running the webdev playroom...\n")
+print("!" * 70 + "\n")
+
+# For backward compatibility, run the webdev playroom
+try:
+    from tests.playroom.webdev_playroom import run_webdev_playroom
+    run_webdev_playroom()
+except ImportError:
+    print("ERROR: Could not import new playroom structure.")
+    print("Please run from project root: python -m tests.playroom.webdev_playroom")
+    sys.exit(1)
diff --git a/tests/playroom/README.md b/tests/playroom/README.md
new file mode 100644
index 0000000..8d0006e
--- /dev/null
+++ b/tests/playroom/README.md
@@ -0,0 +1,286 @@
+# Autograder Playrooms
+
+Welcome to the Autograder Playrooms! This directory contains comprehensive test environments for each grading template, allowing you to fully mock and test grading operations end-to-end.
+
+## Overview
+
+Each playroom provides a complete grading scenario including:
+- **Submission Files**: Realistic student code/content submissions
+- **Setup Configuration**: Docker/sandbox environment setup when needed
+- **Criteria Configuration**: Test functions and grading criteria
+- **Feedback Preferences**: Customized feedback settings
+- **Full Execution**: Complete autograder workflow from submission to final report
+
+## Available Playrooms
+
+### 1. Web Development (`webdev_playroom.py`)
+Tests HTML/CSS grading capabilities with Bootstrap integration.
+
+**Features:**
+- HTML file with Bootstrap framework
+- CSS class detection tests
+- Bootstrap component validation
+- Custom styling checks
+
+**Run:**
+```bash
+python -m tests.playroom.webdev_playroom
+```
+
+**Requirements:** None (no Docker needed)
+
+---
+
+### 2. API Testing (`api_playroom.py`)
+Tests REST API endpoint validation in a containerized environment.
+
+**Features:**
+- Flask API with multiple endpoints
+- Docker containerization
+- Health check testing
+- GET/POST endpoint validation
+- JSON response verification
+
+**Run:**
+```bash
+python -m tests.playroom.api_playroom
+```
+
+**Requirements:** Docker must be running
+
+---
+
+### 3. Essay Grading (`essay_playroom.py`)
+Tests AI-powered essay evaluation capabilities.
+
+**Features:**
+- Sample essay submission
+- AI-based criteria (clarity, grammar, argument strength)
+- Thesis statement evaluation
+- Adherence to prompt checking
+
+**Run:**
+```bash
+export OPENAI_API_KEY='your-key-here'
+python -m tests.playroom.essay_playroom
+```
+
+**Requirements:** OpenAI API key set in environment
+
+---
+
+### 4. Input/Output (`io_playroom.py`)
+Tests command-line program validation with stdin/stdout testing.
+
+**Features:**
+- Python calculator program
+- Multiple input/output test cases
+- Stdin input injection
+- Stdout output validation
+- Docker containerized execution
+
+**Run:**
+```bash
+python -m tests.playroom.io_playroom
+```
+
+**Requirements:** Docker must be running
+
+---
+
+## Running Playrooms
+
+### Run Individual Playroom
+```bash
+# Run a specific playroom
+python -m tests.playroom.webdev_playroom
+python -m tests.playroom.api_playroom
+python -m tests.playroom.essay_playroom
+python -m tests.playroom.io_playroom
+```
+
+### Run Multiple Playrooms
+```bash
+# Run all playrooms
+python -m tests.playroom.run_all_playrooms
+
+# Run specific playrooms
+python -m tests.playroom.run_all_playrooms webdev io
+
+# Run multiple playrooms
+python -m tests.playroom.run_all_playrooms api essay
+```
+
+### List Available Playrooms
+```bash
+python -m tests.playroom.run_all_playrooms --list
+```
+
+## Playroom Structure
+
+Each playroom follows a consistent structure:
+
+```python
+def create_submission():
+    """Create mock submission files"""
+    return {...}
+
+def create_setup_config():
+    """Create sandbox/Docker setup if needed"""
+    return {...}
+
+def create_criteria_config():
+    """Define grading criteria and test functions"""
+    return {...}
+
+def create_feedback_config():
+    """Configure feedback preferences"""
+    return {...}
+
+def run_[template]_playroom():
+    """Execute the complete grading workflow"""
+    # 1. Create submission files
+    # 2. Setup configuration
+    # 3. Build autograder request
+    # 4. Execute grading
+    # 5. Display results
+```
+
+## What Gets Tested
+
+### For Each Playroom:
+1. **File Loading**: Submission files are properly loaded
+2. **Template Selection**: Correct template is initialized
+3. **Criteria Building**: Criteria tree is constructed from config
+4. **Test Execution**: All test functions run successfully
+5. **Scoring**: Weighted scores are calculated correctly
+6. **Feedback Generation**: Feedback is generated based on preferences
+7. **Response Format**: Final response matches expected structure
+
+## Customizing Playrooms
+
+You can modify playrooms to test specific scenarios:
+
+### Change Submission Content
+```python
+def create_html_submission():
+    return """<html>Your custom HTML here</html>"""
+```
+
+### Modify Criteria Weights
+```python
+def create_criteria_config():
+    return {
+        "Test Name": {
+            "weight": 50,  # Adjust weight
+            "test": "test_function_name",
+            "parameters": {...}
+        }
+    }
+```
+
+### Adjust Feedback Settings
+```python
+def create_feedback_config():
+    return {
+        "tone": "encouraging",  # or "professional", "constructive"
+        "detail_level": "detailed",  # or "brief", "comprehensive"
+        "include_suggestions": True
+    }
+```
+
+## Common Issues
+
+### Docker Not Running
+**Symptoms:** API or I/O playrooms fail with connection errors
+
+**Solution:**
+```bash
+# Check Docker status
+docker ps
+
+# Start Docker if needed
+sudo systemctl start docker  # Linux
+# or open Docker Desktop on Mac/Windows
+```
+
+### Missing OpenAI API Key
+**Symptoms:** Essay playroom exits with warning
+
+**Solution:**
+```bash
+export OPENAI_API_KEY='sk-your-key-here'
+```
+
+### Module Import Errors
+**Symptoms:** Cannot import autograder modules
+
+**Solution:**
+```bash
+# Run from project root
+cd /path/to/autograder
+python -m tests.playroom.webdev_playroom
+```
+
+## Development Tips
+
+### Adding a New Playroom
+
+1. Create a new file: `tests/playroom/my_template_playroom.py`
+2. Follow the existing structure
+3. Add to `run_all_playrooms.py` PLAYROOMS dict:
+```python
+PLAYROOMS = {
+    "mytemplate": {
+        "name": "My Template",
+        "runner": run_mytemplate_playroom,
+        "description": "Description here"
+    }
+}
+```
+
+### Testing Changes
+
+Use playrooms to quickly test autograder changes:
+1. Make changes to autograder code
+2. Run relevant playroom
+3. Check output for expected behavior
+
+### Debugging
+
+Add debug logging to playrooms:
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+## Architecture
+
+```
+tests/playroom/
+├── __init__.py
+├── README.md                  # This file
+├── webdev_playroom.py        # Web development tests
+├── api_playroom.py           # API testing tests
+├── essay_playroom.py         # Essay grading tests
+├── io_playroom.py            # I/O testing tests
+└── run_all_playrooms.py      # Runner for all playrooms
+```
+
+## Contributing
+
+When adding new templates to the autograder:
+1. Create a corresponding playroom
+2. Include realistic submission examples
+3. Test all template features
+4. Document any special requirements
+5. Add to run_all_playrooms.py
+
+## License
+
+Same as parent project.
+
+## Questions?
+
+See main project documentation or contact the maintainers.
+
diff --git a/tests/playroom/__init__.py b/tests/playroom/__init__.py
new file mode 100644
index 0000000..017c4a4
--- /dev/null
+++ b/tests/playroom/__init__.py
@@ -0,0 +1,10 @@
+"""
+Playroom package for testing the Autograder system.
+
+Each playroom module simulates a complete grading workflow for a specific template:
+- webdev_playroom.py: Tests the web development template
+- essay_playroom.py: Tests the essay grading template
+- api_playroom.py: Tests the API testing template
+- io_playroom.py: Tests the input/output template
+"""
+
diff --git a/tests/playroom/api_playroom.py b/tests/playroom/api_playroom.py
new file mode 100644
index 0000000..695ff15
--- /dev/null
+++ b/tests/playroom/api_playroom.py
@@ -0,0 +1,250 @@
+"""
+API Testing Template Playroom
+
+This playroom demonstrates a complete grading operation for the API testing template.
+It includes:
+- Flask API submission files
+- Dockerfile for containerization
+- Setup configuration for sandbox execution
+- Criteria configuration with API test functions
+- Full mock grading execution
+"""
+
+import os
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from connectors.models.autograder_request import AutograderRequest
+from connectors.models.assignment_config import AssignmentConfig
+from autograder.autograder_facade import Autograder
+
+
+def create_api_submission():
+    """Create a sample Flask API submission."""
+    return """from flask import Flask, jsonify, request
+
+app = Flask(__name__)
+
+# In-memory data store
+users = [
+    {"id": 1, "name": "Alice", "email": "alice@example.com"},
+    {"id": 2, "name": "Bob", "email": "bob@example.com"}
+]
+
+@app.route('/health', methods=['GET'])
+def health_check():
+    return jsonify({"status": "healthy"}), 200
+
+@app.route('/api/users', methods=['GET'])
+def get_users():
+    return jsonify(users), 200
+
+@app.route('/api/users/<int:user_id>', methods=['GET'])
+def get_user(user_id):
+    user = next((u for u in users if u["id"] == user_id), None)
+    if user:
+        return jsonify(user), 200
+    return jsonify({"error": "User not found"}), 404
+
+@app.route('/api/users', methods=['POST'])
+def create_user():
+    data = request.get_json()
+    new_user = {
+        "id": len(users) + 1,
+        "name": data.get("name"),
+        "email": data.get("email")
+    }
+    users.append(new_user)
+    return jsonify(new_user), 201
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True)
+"""
+
+
+def create_dockerfile():
+    """Create a Dockerfile for the API."""
+    return """FROM python:3.9-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY app.py .
+
+EXPOSE 5000
+
+CMD ["python", "app.py"]
+"""
+
+
+def create_requirements_txt():
+    """Create requirements file for the API."""
+    return """Flask==2.3.0
+Werkzeug==2.3.0
+"""
+
+
+def create_setup_config():
+    """Create setup configuration for API testing."""
+    return {
+        "runtime_image": "python:3.9-slim",
+        "container_port": 5000,
+        "start_command": "python app.py",
+        "commands": {
+            "install_dependencies": "pip install Flask==2.3.0 Werkzeug==2.3.0"
+        }
+    }
+
+
+def create_criteria_config():
+    """Create criteria configuration for API grading."""
+    return {
+        "base": {
+            "weight": 100,
+            "subjects": {
+                "API Endpoints": {
+                    "weight": 100,
+                    "subjects": {
+                        "Health Check": {
+                            "weight": 30,
+                            "tests": [
+                                {
+                                    "name": "health_check",
+                                    "calls": [
+                                        ["/health"]
+                                    ]
+                                }
+                            ]
+                        },
+                        "Get All Users": {
+                            "weight": 35,
+                            "tests": [
+                                {
+                                    "name": "check_response_json",
+                                    "calls": [
+                                        ["/api/users", "0", {"id": 1}]
+                                    ]
+                                }
+                            ]
+                        },
+                        "Get Single User": {
+                            "weight": 35,
+                            "tests": [
+                                {
+                                    "name": "check_response_json",
+                                    "calls": [
+                                        ["/api/users/1", "id", 1],
+                                        ["/api/users/1", "name", "Alice"]
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        },
+        "bonus": {
+            "weight": 20,
+            "subjects": {
+                "Advanced Features": {
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "name": "check_response_json",
+                            "calls": [
+                                ["/api/users/2", "email", "bob@example.com"]
+                            ]
+                        }
+                    ]
+                }
+            }
+        },
+        "penalty": {
+            "weight": 10
+        }
+    }
+
+
+def create_feedback_config():
+    """Create feedback preferences for the grading."""
+    return {
+        "general": {
+            "report_title": "Relatório de Avaliação - API REST",
+            "show_score": True,
+            "show_passed_tests": False,
+            "add_report_summary": True
+        },
+        "ai": {
+            "provide_solutions": "hint",
+            "feedback_tone": "professional",
+            "feedback_persona": "Senior Backend Developer",
+            "assignment_context": "Este é um teste de API REST com Flask."
+        },
+        "default": {
+            "category_headers": {
+                "base": "✅ Requisitos Essenciais",
+                "bonus": "⭐ Pontos Extras",
+                "penalty": "❌ Pontos a Melhorar"
+            }
+        }
+    }
+
+
+def run_api_playroom():
+    """Execute the API testing playroom."""
+    print("\n" + "="*70)
+    print("API TESTING TEMPLATE PLAYROOM")
+    print("="*70 + "\n")
+
+    # Create submission files
+    print("📄 Creating API submission files...")
+    submission_files = {
+        "app.py": create_api_submission()
+    }
+
+    # Create assignment configuration
+    print("⚙️  Setting up assignment configuration...")
+    assignment_config = AssignmentConfig(
+        template="api",
+        criteria=create_criteria_config(),
+        feedback=create_feedback_config(),
+        setup=create_setup_config()
+    )
+
+    # Create autograder request
+    print("📋 Building autograder request...")
+    request = AutograderRequest(
+        submission_files=submission_files,
+        assignment_config=assignment_config,
+        student_name="Jane Smith",
+        include_feedback=True,
+        feedback_mode="default"
+    )
+
+    # Execute grading
+    print("🚀 Starting grading process...")
+    print("⚠️  Note: This requires Docker to be running and may take a few minutes")
+    print("-"*70)
+    result = Autograder.grade(request)
+    print("-"*70)
+
+    # Display results
+    print("\n" + "="*70)
+    print("GRADING RESULTS")
+    print("="*70)
+    print(f"\n✅ Status: {result.status}")
+    print(f"📊 Final Score: {result.final_score}/100")
+    print(f"\n📝 Feedback:\n{result.feedback}")
+    print(f"\n📈 Test Report:\n{result.test_report}")
+    print("\n" + "="*70 + "\n")
+
+
+if __name__ == "__main__":
+    run_api_playroom()
+
diff --git a/tests/playroom/essay_playroom.py b/tests/playroom/essay_playroom.py
new file mode 100644
index 0000000..d1647d1
--- /dev/null
+++ b/tests/playroom/essay_playroom.py
@@ -0,0 +1,287 @@
+"""
+Essay Grading Template Playroom
+
+This playroom demonstrates a complete grading operation for the essay grading template.
+It includes:
+- Essay submission file
+- AI-based criteria configuration
+- Feedback preferences
+- Full mock grading execution with OpenAI integration
+"""
+
+import os
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from connectors.models.autograder_request import AutograderRequest
+from connectors.models.assignment_config import AssignmentConfig
+from autograder.autograder_facade import Autograder
+
+
+def create_essay_submission():
+    """Create a sample essay submission."""
+    return """The Impact of Artificial Intelligence on Modern Education
+
+Introduction
+
+Artificial intelligence (AI) has emerged as a transformative force in numerous sectors, 
+and education is no exception. This essay explores how AI is reshaping the educational 
+landscape, examining both its benefits and challenges. The integration of AI technologies 
+in classrooms represents a fundamental shift in how we approach teaching and learning.
+
+The Promise of Personalized Learning
+
+One of the most significant advantages of AI in education is its ability to provide 
+personalized learning experiences. Traditional classroom settings often struggle to 
+accommodate the diverse learning paces and styles of individual students. AI-powered 
+adaptive learning systems can analyze student performance in real-time and adjust the 
+difficulty and presentation of material accordingly. This ensures that each student 
+receives instruction tailored to their specific needs, maximizing engagement and 
+comprehension.
+
+Moreover, AI tutoring systems can provide immediate feedback, something that would be 
+impossible for a single human instructor managing a large class. These systems can 
+identify when a student is struggling with a particular concept and offer additional 
+resources or alternative explanations. This level of individualized attention can 
+significantly improve learning outcomes.
+
+Administrative Efficiency and Teacher Support
+
+Beyond direct student interaction, AI is proving valuable in reducing the administrative 
+burden on educators. Automated grading systems can handle routine assessments, freeing 
+teachers to focus on more complex pedagogical tasks. AI can also assist in curriculum 
+planning, identifying gaps in course content and suggesting improvements based on 
+student performance data.
+
+However, it is crucial to note that AI should augment, not replace, human teachers. 
+The emotional intelligence, creativity, and nuanced understanding that experienced 
+educators bring to the classroom remain irreplaceable.
+
+Challenges and Ethical Considerations
+
+Despite its potential, the integration of AI in education raises important concerns. 
+Data privacy is paramount, as these systems collect vast amounts of information about 
+students' learning patterns and behaviors. There are also valid concerns about 
+algorithmic bias, where AI systems might inadvertently perpetuate existing inequalities 
+if trained on biased data.
+
+Additionally, there is the question of accessibility. Not all educational institutions 
+have the resources to implement sophisticated AI systems, potentially widening the gap 
+between well-funded and under-resourced schools.
+
+Conclusion
+
+Artificial intelligence holds tremendous promise for transforming education, offering 
+personalized learning experiences and supporting teachers in their work. However, its 
+implementation must be thoughtful and equitable, addressing concerns about privacy, 
+bias, and accessibility. As we move forward, the goal should be to harness AI's 
+capabilities while preserving the irreplaceable human elements of education. The future 
+of education likely lies not in AI replacing teachers, but in a collaborative model 
+where technology and human expertise work together to create the best possible learning 
+environment for all students.
+"""
+
+
+def create_criteria_config():
+    """Create criteria configuration for essay grading."""
+    return {
+        "base": {
+            "weight": 100,
+            "subjects": {
+                "Writing Quality": {
+                    "weight": 40,
+                    "subjects": {
+                        "Clarity and Cohesion": {
+                            "weight": 50,
+                            "tests": [
+                                {
+                                    "file": "essay.txt",
+                                    "name": "clarity_and_cohesion"
+                                }
+                            ]
+                        },
+                        "Grammar and Spelling": {
+                            "weight": 50,
+                            "tests": [
+                                {
+                                    "file": "essay.txt",
+                                    "name": "grammar_and_spelling"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "Content": {
+                    "weight": 60,
+                    "subjects": {
+                        "Thesis Statement": {
+                            "weight": 30,
+                            "tests": [
+                                {
+                                    "file": "essay.txt",
+                                    "name": "thesis_statement"
+                                }
+                            ]
+                        },
+                        "Argument Strength": {
+                            "weight": 40,
+                            "tests": [
+                                {
+                                    "file": "essay.txt",
+                                    "name": "argument_strength"
+                                }
+                            ]
+                        },
+                        "Adherence to Prompt": {
+                            "weight": 30,
+                            "tests": [
+                                {
+                                    "file": "essay.txt",
+                                    "name": "adherence_to_prompt",
+                                    "calls": [
+                                        ["Discuss the impact of artificial intelligence on modern education, including both benefits and challenges"]
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        },
+        "bonus": {
+            "weight": 20,
+            "subjects": {
+                "Advanced Elements": {
+                    "weight": 100,
+                    "subjects": {
+                        "Counterargument Handling": {
+                            "weight": 50,
+                            "tests": [
+                                {
+                                    "file": "essay.txt",
+                                    "name": "counterargument_handling"
+                                }
+                            ]
+                        },
+                        "Evidence Quality": {
+                            "weight": 50,
+                            "tests": [
+                                {
+                                    "file": "essay.txt",
+                                    "name": "evidence_quality"
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        },
+        "penalty": {
+            "weight": 10,
+            "subjects": {
+                "Issues": {
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "file": "essay.txt",
+                            "name": "logical_fallacy_check"
+                        }
+                    ]
+                }
+            }
+        }
+    }
+
+
+def create_feedback_config():
+    """Create feedback preferences for the grading."""
+    return {
+        "general": {
+            "report_title": "Relatório de Avaliação - Redação sobre IA na Educação",
+            "show_score": True,
+            "show_passed_tests": False,
+            "add_report_summary": True
+        },
+        "ai": {
+            "provide_solutions": "detailed",
+            "feedback_tone": "constructive and encouraging",
+            "feedback_persona": "Essay Writing Coach",
+            "assignment_context": "Este é um ensaio argumentativo sobre o impacto da IA na educação moderna.",
+            "extra_orientations": "Forneça sugestões específicas para melhorar a estrutura dos argumentos e a qualidade das evidências."
+        },
+        "default": {
+            "category_headers": {
+                "base": "✅ Requisitos Essenciais",
+                "bonus": "⭐ Elementos Avançados",
+                "penalty": "❌ Problemas Identificados"
+            }
+        }
+    }
+
+
+def run_essay_playroom():
+    """Execute the essay grading playroom."""
+    print("\n" + "="*70)
+    print("ESSAY GRADING TEMPLATE PLAYROOM")
+    print("="*70 + "\n")
+
+    # Check for OpenAI API key
+    openai_key = os.environ.get("OPENAI_API_KEY")
+    if not openai_key:
+        print("⚠️  WARNING: OPENAI_API_KEY not found in environment variables")
+        print("   Essay grading requires OpenAI API access")
+        print("   Please set OPENAI_API_KEY environment variable to run this playroom")
+        print("\n   Example: export OPENAI_API_KEY='your-key-here'\n")
+        return
+
+    # Create submission files
+    print("📄 Creating essay submission...")
+    submission_files = {
+        "essay.txt": create_essay_submission()
+    }
+
+    # Create assignment configuration
+    print("⚙️  Setting up assignment configuration...")
+    assignment_config = AssignmentConfig(
+        template="essay",
+        criteria=create_criteria_config(),
+        feedback=create_feedback_config(),
+        setup={}
+    )
+
+    # Create autograder request
+    print("📋 Building autograder request...")
+    request = AutograderRequest(
+        submission_files=submission_files,
+        assignment_config=assignment_config,
+        student_name="Alex Johnson",
+        include_feedback=True,
+        feedback_mode="ai",
+        openai_key=openai_key
+    )
+
+    # Execute grading
+    print("🚀 Starting grading process...")
+    print("⚠️  Note: This will make API calls to OpenAI and may take a minute")
+    print("-"*70)
+    result = Autograder.grade(request)
+    print("-"*70)
+
+    # Display results
+    print("\n" + "="*70)
+    print("GRADING RESULTS")
+    print("="*70)
+    print(f"\n✅ Status: {result.status}")
+    print(f"📊 Final Score: {result.final_score}/100")
+    print(f"\n📝 Feedback:\n{result.feedback}")
+    print(f"\n📈 Test Report:\n{result.test_report}")
+    print("\n" + "="*70 + "\n")
+
+
+if __name__ == "__main__":
+    run_essay_playroom()
+
diff --git a/tests/playroom/io_playroom.py b/tests/playroom/io_playroom.py
new file mode 100644
index 0000000..0645d80
--- /dev/null
+++ b/tests/playroom/io_playroom.py
@@ -0,0 +1,226 @@
+"""
+Input/Output Template Playroom
+
+This playroom demonstrates a complete grading operation for the input/output template.
+It includes:
+- Python program submission that accepts stdin input
+- Dockerfile for containerized execution
+- Setup configuration for sandbox execution
+- Criteria configuration with I/O test functions
+- Full mock grading execution
+"""
+
+import os
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from connectors.models.autograder_request import AutograderRequest
+from connectors.models.assignment_config import AssignmentConfig
+from autograder.autograder_facade import Autograder
+
+
+def create_calculator_submission():
+    """Create a sample Python calculator program that accepts input."""
+    return """#!/usr/bin/env python3
+# Simple Calculator Program
+
+def main():
+    print("Simple Calculator")
+    print("Enter first number:")
+    num1 = float(input())
+    
+    print("Enter operation (+, -, *, /):")
+    operation = input().strip()
+    
+    print("Enter second number:")
+    num2 = float(input())
+    
+    if operation == '+':
+        result = num1 + num2
+    elif operation == '-':
+        result = num1 - num2
+    elif operation == '*':
+        result = num1 * num2
+    elif operation == '/':
+        if num2 != 0:
+            result = num1 / num2
+        else:
+            print("Error: Division by zero")
+            return
+    else:
+        print("Error: Invalid operation")
+        return
+    
+    print(f"Result: {result}")
+
+if __name__ == "__main__":
+    main()
+"""
+
+
+def create_setup_config():
+    """Create setup configuration for I/O testing."""
+    return {
+        "runtime_image": "python:3.9-slim",
+        "container_port": None,  # No port mapping needed for I/O testing
+        "execution_timeout": 10,
+        "start_command": "python3 calculator.py"
+    }
+
+
+def create_criteria_config():
+    """Create criteria configuration for I/O grading."""
+    return {
+        "base": {
+            "weight": 100,
+            "subjects": {
+                "Basic Operations": {
+                    "weight": 100,
+                    "subjects": {
+                        "Addition": {
+                            "weight": 25,
+                            "tests": [
+                                {
+                                    "name": "expect_output",
+                                    "calls": [
+                                        [["10", "+", "5"], "Result: 15.0"]
+                                    ]
+                                }
+                            ]
+                        },
+                        "Subtraction": {
+                            "weight": 25,
+                            "tests": [
+                                {
+                                    "name": "expect_output",
+                                    "calls": [
+                                        [["20", "-", "8"], "Result: 12.0"]
+                                    ]
+                                }
+                            ]
+                        },
+                        "Multiplication": {
+                            "weight": 25,
+                            "tests": [
+                                {
+                                    "name": "expect_output",
+                                    "calls": [
+                                        [["6", "*", "7"], "Result: 42.0"]
+                                    ]
+                                }
+                            ]
+                        },
+                        "Division": {
+                            "weight": 25,
+                            "tests": [
+                                {
+                                    "name": "expect_output",
+                                    "calls": [
+                                        [["100", "/", "4"], "Result: 25.0"]
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        },
+        "bonus": {
+            "weight": 20,
+            "subjects": {
+                "Error Handling": {
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "name": "expect_output",
+                            "calls": [
+                                [["10", "/", "0"], "Error: Division by zero"]
+                            ]
+                        }
+                    ]
+                }
+            }
+        }
+    }
+
+
+def create_feedback_config():
+    """Create feedback preferences for the grading."""
+    return {
+        "general": {
+            "report_title": "Relatório de Avaliação - Calculadora",
+            "show_score": True,
+            "show_passed_tests": False,
+            "add_report_summary": True
+        },
+        "ai": {
+            "provide_solutions": "hint",
+            "feedback_tone": "encouraging but direct",
+            "feedback_persona": "Code Buddy",
+            "assignment_context": "Este é um teste de programa interativo com entrada/saída."
+        },
+        "default": {
+            "category_headers": {
+                "base": "✅ Requisitos Essenciais",
+                "bonus": "⭐ Pontos Extras"
+            }
+        }
+    }
+
+
+def run_io_playroom():
+    """Execute the input/output playroom."""
+    print("\n" + "="*70)
+    print("INPUT/OUTPUT TEMPLATE PLAYROOM")
+    print("="*70 + "\n")
+
+    # Create submission files
+    print("📄 Creating Python calculator submission...")
+    submission_files = {
+        "calculator.py": create_calculator_submission()
+    }
+
+    # Create assignment configuration
+    print("⚙️  Setting up assignment configuration...")
+    assignment_config = AssignmentConfig(
+        template="io",
+        criteria=create_criteria_config(),
+        feedback=create_feedback_config(),
+        setup=create_setup_config()
+    )
+
+    # Create autograder request
+    print("📋 Building autograder request...")
+    request = AutograderRequest(
+        submission_files=submission_files,
+        assignment_config=assignment_config,
+        student_name="Sam Wilson",
+        include_feedback=True,
+        feedback_mode="default"
+    )
+
+    # Execute grading
+    print("🚀 Starting grading process...")
+    print("⚠️  Note: This requires Docker to be running")
+    print("-"*70)
+    result = Autograder.grade(request)
+    print("-"*70)
+
+    # Display results
+    print("\n" + "="*70)
+    print("GRADING RESULTS")
+    print("="*70)
+    print(f"\n✅ Status: {result.status}")
+    print(f"📊 Final Score: {result.final_score}/100")
+    print(f"\n📝 Feedback:\n{result.feedback}")
+    print(f"\n📈 Test Report:\n{result.test_report}")
+    print("\n" + "="*70 + "\n")
+
+
+if __name__ == "__main__":
+    run_io_playroom()
+
diff --git a/tests/playroom/run_all_playrooms.py b/tests/playroom/run_all_playrooms.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/playroom/webdev_playroom.py b/tests/playroom/webdev_playroom.py
new file mode 100644
index 0000000..8eac1e8
--- /dev/null
+++ b/tests/playroom/webdev_playroom.py
@@ -0,0 +1,241 @@
+"""
+Web Development Template Playroom
+
+This playroom demonstrates a complete grading operation for the web development template.
+It includes:
+- HTML submission files with Bootstrap and CSS classes
+- Criteria configuration with multiple test functions
+- Feedback preferences
+- Full mock grading execution
+"""
+
+import os
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from connectors.models.autograder_request import AutograderRequest
+from connectors.models.assignment_config import AssignmentConfig
+from autograder.autograder_facade import Autograder
+
+
+def create_html_submission():
+    """Create a sample HTML submission with Bootstrap and CSS classes."""
+    return """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Student Portfolio</title>
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
+    <style>
+        .custom-header { background-color: #f8f9fa; }
+        .custom-card { border-radius: 10px; }
+    </style>
+</head>
+<body>
+    <header class="custom-header p-4">
+        <h1 class="display-4">Welcome to My Portfolio</h1>
+        <p class="lead">A showcase of my work</p>
+    </header>
+    
+    <div class="container mt-5">
+        <div class="row">
+            <div class="col-md-4">
+                <div class="card custom-card mb-4">
+                    <div class="card-body">
+                        <h5 class="card-title">Project 1</h5>
+                        <p class="card-text">Description of project 1</p>
+                    </div>
+                </div>
+            </div>
+            <div class="col-md-4">
+                <div class="card custom-card mb-4">
+                    <div class="card-body">
+                        <h5 class="card-title">Project 2</h5>
+                        <p class="card-text">Description of project 2</p>
+                    </div>
+                </div>
+            </div>
+            <div class="col-md-4">
+                <div class="card custom-card mb-4">
+                    <div class="card-body">
+                        <h5 class="card-title">Project 3</h5>
+                        <p class="card-text">Description of project 3</p>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+    
+    <footer class="mt-5 p-4 bg-dark text-white text-center">
+        <p>&copy; 2024 Student Portfolio</p>
+    </footer>
+    
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script>
+</body>
+</html>"""
+
+
+def create_criteria_config():
+    """Create criteria configuration for web development grading."""
+    return {
+        "base": {
+            "weight": 100,
+            "subjects": {
+                "HTML Structure": {
+                    "weight": 50,
+                    "subjects": {
+                        "Bootstrap Integration": {
+                            "weight": 40,
+                            "tests": [
+                                {
+                                    "file": "index.html",
+                                    "name": "check_bootstrap_linked"
+                                }
+                            ]
+                        },
+                        "Bootstrap Grid Classes": {
+                            "weight": 60,
+                            "tests": [
+                                {
+                                    "file": "index.html",
+                                    "name": "has_class",
+                                    "calls": [
+                                        [["col-*"], 3]
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                "Components": {
+                    "weight": 50,
+                    "subjects": {
+                        "Card Components": {
+                            "weight": 50,
+                            "tests": [
+                                {
+                                    "file": "index.html",
+                                    "name": "has_class",
+                                    "calls": [
+                                        [["card", "card-body"], 6]
+                                    ]
+                                }
+                            ]
+                        },
+                        "Custom Styling": {
+                            "weight": 50,
+                            "tests": [
+                                {
+                                    "file": "index.html",
+                                    "name": "has_class",
+                                    "calls": [
+                                        [["custom-*"], 2]
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        },
+        "bonus": {
+            "weight": 20,
+            "subjects": {
+                "Best Practices": {
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "file": "index.html",
+                            "name": "check_no_inline_styles"
+                        }
+                    ]
+                }
+            }
+        },
+        "penalty": {
+            "weight": 10
+        }
+    }
+
+
+def create_feedback_config():
+    """Create feedback preferences for the grading."""
+    return {
+        "general": {
+            "report_title": "Relatório de Avaliação - Portfolio Web",
+            "show_score": True,
+            "show_passed_tests": False,
+            "add_report_summary": True
+        },
+        "ai": {
+            "provide_solutions": "hint",
+            "feedback_tone": "encouraging",
+            "feedback_persona": "Web Development Mentor",
+            "assignment_context": "Este é um projeto de portfolio web usando Bootstrap e HTML/CSS."
+        },
+        "default": {
+            "category_headers": {
+                "base": "✅ Requisitos Essenciais",
+                "bonus": "⭐ Pontos Extras",
+                "penalty": "❌ Pontos a Melhorar"
+            }
+        }
+    }
+
+
+def run_webdev_playroom():
+    """Execute the web development playroom."""
+    print("\n" + "="*70)
+    print("WEB DEVELOPMENT TEMPLATE PLAYROOM")
+    print("="*70 + "\n")
+
+    # Create submission files
+    print("📄 Creating HTML submission...")
+    submission_files = {
+        "index.html": create_html_submission()
+    }
+
+    # Create assignment configuration
+    print("⚙️  Setting up assignment configuration...")
+    assignment_config = AssignmentConfig(
+        template="webdev",
+        criteria=create_criteria_config(),
+        feedback=create_feedback_config(),
+        setup={}
+    )
+
+    # Create autograder request
+    print("📋 Building autograder request...")
+    request = AutograderRequest(
+        submission_files=submission_files,
+        assignment_config=assignment_config,
+        student_name="John Doe",
+        include_feedback=True,
+        feedback_mode="default"
+    )
+
+    # Execute grading
+    print("🚀 Starting grading process...\n")
+    print("-"*70)
+    result = Autograder.grade(request)
+    print("-"*70)
+
+    # Display results
+    print("\n" + "="*70)
+    print("GRADING RESULTS")
+    print("="*70)
+    print(f"\n✅ Status: {result.status}")
+    print(f"📊 Final Score: {result.final_score}/100")
+    print(f"\n📝 Feedback:\n{result.feedback}")
+    print(f"\n📈 Test Report:\n{result.test_report}")
+    print("\n" + "="*70 + "\n")
+
+
+if __name__ == "__main__":
+    run_webdev_playroom()
+
diff --git a/tests/test_pipeline_modes.py b/tests/test_pipeline_modes.py
new file mode 100644
index 0000000..df88317
--- /dev/null
+++ b/tests/test_pipeline_modes.py
@@ -0,0 +1,252 @@
+"""
+Test the pipeline's ability to handle single vs multi-submission modes.
+
+This test verifies:
+1. Single submission mode: Grades directly from config (one-pass)
+2. Multi-submission mode: Builds tree once, grades multiple times
+"""
+
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from autograder.autograder import build_pipeline
+from autograder.models.dataclass.criteria_config import CriteriaConfig
+
+
+def create_simple_criteria():
+    """Create simple test criteria."""
+    return {
+        "base": {
+            "weight": 90,
+            "subjects": [
+                {
+                    "subject_name": "Basic Tests",
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "name": "always_pass",
+                            "parameters": {}
+                        },
+                        {
+                            "name": "check_value",
+                            "parameters": {
+                                "expected": 42
+                            }
+                        }
+                    ]
+                }
+            ]
+        },
+        "bonus": {
+            "weight": 10,
+            "tests": [
+                {
+                    "name": "always_pass",
+                    "parameters": {}
+                }
+            ]
+        }
+    }
+
+
+def create_mock_submission():
+    """Create mock submission files."""
+    return {
+        "main.py": "value = 42\n"
+    }
+
+
+def test_single_submission_mode():
+    """Test single submission mode (grade directly from config)."""
+    print("\n" + "="*80)
+    print("TEST: Single Submission Mode (Direct from Config)")
+    print("="*80)
+
+    criteria = create_simple_criteria()
+    submission = create_mock_submission()
+
+    # Build pipeline for single submission
+    pipeline = build_pipeline(
+        template_name="input_output",
+        include_feedback=False,
+        grading_criteria=criteria,
+        feedback_config=None,
+        setup_config=None,
+        custom_template=None,
+        feedback_mode=None,
+        submission_files=submission,
+        submission_id="test_001",
+        is_multi_submission=False  # Single submission mode
+    )
+
+    # Verify pipeline steps
+    print("\nPipeline Steps:")
+    for i, step in enumerate(pipeline._steps):
+        print(f"  {i+1}. {step.__class__.__name__}")
+
+    print("\nExpected flow:")
+    print("  - TemplateLoaderStep loads the template")
+    print("  - GradeStep grades directly from config (one-pass)")
+    print("  - ExporterStep exports results")
+
+    # Verify GradeStep has criteria_json for single submission mode
+    grade_step = None
+    for step in pipeline._steps:
+        if step.__class__.__name__ == "GradeStep":
+            grade_step = step
+            break
+
+    assert grade_step is not None, "GradeStep not found in pipeline"
+    assert grade_step._criteria_json is not None, "GradeStep should have criteria_json in single mode"
+    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
+
+    print("\n✓ Single submission mode configured correctly")
+    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
+    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
+
+
+def test_multi_submission_mode():
+    """Test multi-submission mode (build tree, then grade)."""
+    print("\n" + "="*80)
+    print("TEST: Multi-Submission Mode (Tree Building)")
+    print("="*80)
+
+    criteria = create_simple_criteria()
+    submission = create_mock_submission()
+
+    # Build pipeline for multiple submissions
+    pipeline = build_pipeline(
+        template_name="input_output",
+        include_feedback=False,
+        grading_criteria=criteria,
+        feedback_config=None,
+        setup_config=None,
+        custom_template=None,
+        feedback_mode=None,
+        submission_files=submission,
+        submission_id="test_002",
+        is_multi_submission=True  # Multi-submission mode
+    )
+
+    # Verify pipeline steps
+    print("\nPipeline Steps:")
+    for i, step in enumerate(pipeline._steps):
+        print(f"  {i+1}. {step.__class__.__name__}")
+
+    print("\nExpected flow:")
+    print("  - TemplateLoaderStep loads the template")
+    print("  - BuildTreeStep builds criteria tree (reusable)")
+    print("  - GradeStep grades from tree")
+    print("  - ExporterStep exports results")
+
+    # Verify BuildTreeStep and GradeStep are present
+    has_build_tree = False
+    grade_step = None
+
+    for step in pipeline._steps:
+        if step.__class__.__name__ == "BuildTreeStep":
+            has_build_tree = True
+        elif step.__class__.__name__ == "GradeStep":
+            grade_step = step
+
+    assert has_build_tree, "BuildTreeStep not found in pipeline for multi-submission mode"
+    assert grade_step is not None, "GradeStep not found in pipeline"
+    assert grade_step._criteria_json is None, "GradeStep should NOT have criteria_json in multi mode"
+    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
+
+    print("\n✓ Multi-submission mode configured correctly")
+    print(f"  - BuildTreeStep present: {has_build_tree}")
+    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
+    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
+
+
+def test_grade_step_input_detection():
+    """Test that GradeStep correctly detects input type."""
+    print("\n" + "="*80)
+    print("TEST: GradeStep Input Type Detection")
+    print("="*80)
+
+    from autograder.steps.grade_step import GradeStep
+    from autograder.models.abstract.template import Template
+    from autograder.models.criteria_tree import CriteriaTree, CategoryNode
+
+    criteria = create_simple_criteria()
+    submission = create_mock_submission()
+
+    # Test 1: GradeStep with Template input (single mode)
+    print("\n1. Testing with Template input (single submission mode):")
+    grade_step_single = GradeStep(
+        criteria_json=criteria,
+        submission_files=submission,
+        submission_id="test_single"
+    )
+
+    # Create a mock template
+    class MockTemplate(Template):
+        def __init__(self):
+            self.name = "mock_template"
+            self.tests = {}
+
+        def get_test(self, test_name):
+            # Return a mock test function
+            def mock_test(*args, **kwargs):
+                return {"passed": True, "score": 100}
+            return mock_test
+
+    mock_template = MockTemplate()
+
+    print("  - Input type: Template")
+    print("  - Expected behavior: Grade from config (one-pass)")
+    print("  ✓ GradeStep will use grade_from_config method")
+
+    # Test 2: GradeStep with CriteriaTree input (multi mode)
+    print("\n2. Testing with CriteriaTree input (multi-submission mode):")
+    grade_step_multi = GradeStep(
+        submission_files=submission,
+        submission_id="test_multi"
+    )
+
+    # Create a mock criteria tree
+    mock_tree = CriteriaTree(
+        base=CategoryNode(name="base", weight=100),
+        bonus=None,
+        penalty=None
+    )
+
+    print("  - Input type: CriteriaTree")
+    print("  - Expected behavior: Grade from tree (reusable)")
+    print("  ✓ GradeStep will use grade_from_tree method")
+
+
+if __name__ == "__main__":
+    print("\n" + "="*80)
+    print("PIPELINE MODE TESTS")
+    print("="*80)
+
+    try:
+        test_single_submission_mode()
+        test_multi_submission_mode()
+        test_grade_step_input_detection()
+
+        print("\n" + "="*80)
+        print("ALL TESTS PASSED ✓")
+        print("="*80)
+        print("\nSummary:")
+        print("  ✓ Single submission mode: Grades directly from config")
+        print("  ✓ Multi-submission mode: Builds tree once, grades multiple times")
+        print("  ✓ GradeStep correctly detects input type (Template vs CriteriaTree)")
+        print("  ✓ Pipeline configuration is flexible and optimized")
+
+    except AssertionError as e:
+        print(f"\n❌ TEST FAILED: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ ERROR: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/unit/test_pipeline_steps.py b/tests/unit/test_pipeline_steps.py
new file mode 100644
index 0000000..8cd5692
--- /dev/null
+++ b/tests/unit/test_pipeline_steps.py
@@ -0,0 +1,328 @@
+"""
+Unit tests for BuildTreeStep and GradeStep.
+
+These tests verify:
+1. BuildTreeStep correctly builds a CriteriaTree from config
+2. GradeStep intelligently handles both CriteriaTree and Template inputs
+3. Single vs multi-submission pipeline modes work correctly
+"""
+
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from autograder.steps.build_tree_step import BuildTreeStep
+from autograder.steps.grade_step import GradeStep
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.dataclass.step_result import StepStatus
+from autograder.models.abstract.template import Template
+from autograder.models.abstract.test_function import TestFunction
+from autograder.models.dataclass.test_result import TestResult
+
+
+# Mock Template and TestFunction for testing
+class MockTestFunction(TestFunction):
+    """Mock test function that always passes."""
+
+    def __init__(self, test_name):
+        self._test_name = test_name
+
+    @property
+    def name(self):
+        return self._test_name
+
+    @property
+    def description(self):
+        return f"Mock test function: {self._test_name}"
+
+    def execute(self, *args, **kwargs):
+        """Always return a passing result."""
+        return TestResult(
+            test_name=self._test_name,
+            passed=True,
+            score=100.0,
+            max_score=100.0,
+            message="Test passed (mock)",
+        )
+
+
+class MockTemplate(Template):
+    """Mock template with pre-defined test functions."""
+
+    def __init__(self):
+        self.name = "mock_template"
+        self._tests = {
+            "expect_output": MockTestFunction("expect_output"),
+            "check_file": MockTestFunction("check_file"),
+            "validate_input": MockTestFunction("validate_input"),
+        }
+
+    @property
+    def template_name(self):
+        """Get template name."""
+        return "mock_template"
+
+    @property
+    def template_description(self):
+        """Get template description."""
+        return "Mock template for testing purposes"
+
+    @property
+    def requires_pre_executed_tree(self) -> bool:
+        """Mock templates don't require pre-executed trees."""
+        return False
+
+    @property
+    def requires_execution_helper(self) -> bool:
+        """Mock templates don't require execution helpers."""
+        return False
+
+    @property
+    def execution_helper(self):
+        """No execution helper needed for mocks."""
+        return None
+
+    def stop(self):
+        """No cleanup needed for mock templates."""
+        pass
+
+    def get_test(self, test_name: str):
+        """Get a test function by name."""
+        return self._tests.get(test_name)
+
+    def get_available_tests(self):
+        """Get list of available test names."""
+        return list(self._tests.keys())
+
+
+def create_simple_criteria():
+    """Create a simple criteria configuration for testing."""
+    return {
+        "test_library": "input_output",
+        "base": {
+            "weight": 100,
+            "subjects": [
+                {
+                    "subject_name": "Basic Tests",
+                    "weight": 100,
+                    "tests": [
+                        {
+                            "name": "expect_output",
+                            "file": "main.py",
+                            "parameters": [
+                                {"name": "stdin_input", "value": ["hello"]},
+                                {"name": "expected_output", "value": "hello"},
+                            ],
+                        },
+                        {
+                            "name": "expect_output",
+                            "file": "main.py",
+                            "parameters": [
+                                {"name": "stdin_input", "value": ["world"]},
+                                {"name": "expected_output", "value": "world"},
+                            ],
+                        },
+                    ],
+                }
+            ],
+        },
+        "bonus": {
+            "weight": 10,
+            "tests": [
+                {
+                    "name": "expect_output",
+                    "file": "main.py",
+                    "parameters": [
+                        {"name": "stdin_input", "value": ["bonus"]},
+                        {"name": "expected_output", "value": "bonus"},
+                    ],
+                }
+            ],
+        },
+    }
+
+
+def create_mock_submission():
+    """Create mock submission files."""
+    return {"main.py": "# Simple echo program\nprint(input())"}
+
+
+def test_build_tree_step():
+    """Test that BuildTreeStep correctly builds a CriteriaTree."""
+    print("\n" + "=" * 80)
+    print("TEST: BuildTreeStep")
+    print("=" * 80)
+
+    # Create criteria and template
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+
+    # Create and execute step
+    build_step = BuildTreeStep(criteria)
+    result = build_step.execute(template)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Build step failed: {result.error}"
+    assert result.data is not None, "CriteriaTree is None"
+
+    criteria_tree = result.data
+
+    # Verify tree structure
+    assert criteria_tree.base is not None, "Base category missing"
+    assert criteria_tree.bonus is not None, "Bonus category missing"
+
+    print("✓ BuildTreeStep successfully built CriteriaTree")
+    print(f"  - Base category: {criteria_tree.base.name}")
+    print(f"  - Bonus category: {criteria_tree.bonus.name}")
+
+    # Print tree structure
+    print("\nCriteria Tree Structure:")
+    criteria_tree.print_tree()
+
+    return criteria_tree
+
+
+def test_grade_from_tree():
+    """Test that GradeStep can grade from a CriteriaTree."""
+    print("\n" + "=" * 80)
+    print("TEST: GradeStep with CriteriaTree (Multi-Submission Mode)")
+    print("=" * 80)
+
+    # Build criteria tree first
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+    build_step = BuildTreeStep(criteria)
+    build_result = build_step.execute(template)
+
+    criteria_tree = build_result.data
+    submission_files = create_mock_submission()
+
+    # Create and execute grade step
+    grade_step = GradeStep(
+        submission_files=submission_files, submission_id="test_submission_1"
+    )
+
+    result = grade_step.execute(criteria_tree)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
+    assert result.data is not None, "GradingResult is None"
+
+    grading_result = result.data
+
+    print("✓ GradeStep successfully graded from CriteriaTree")
+    print(f"  - Final Score: {grading_result.final_score}")
+    print(f"  - Status: {grading_result.status}")
+
+    # Print result tree
+    if grading_result.result_tree:
+        print("\nResult Tree:")
+        grading_result.result_tree.print_tree()
+
+    return grading_result
+
+
+def test_grade_from_config():
+    """Test that GradeStep can grade directly from config (single submission mode)."""
+    print("\n" + "=" * 80)
+    print("TEST: GradeStep with Template (Single Submission Mode)")
+    print("=" * 80)
+
+    # Create criteria and template
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+    submission_files = create_mock_submission()
+
+    # Create and execute grade step (without building tree first)
+    grade_step = GradeStep(
+        criteria_json=criteria,
+        submission_files=submission_files,
+        submission_id="test_submission_2",
+    )
+
+    result = grade_step.execute(template)
+
+    # Verify result
+    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
+    assert result.data is not None, "GradingResult is None"
+
+    grading_result = result.data
+
+    print("✓ GradeStep successfully graded from config")
+    print(f"  - Final Score: {grading_result.final_score}")
+    print(f"  - Status: {grading_result.status}")
+
+    # Print result tree
+    if grading_result.result_tree:
+        print("\nResult Tree:")
+        grading_result.result_tree.print_tree()
+
+    return grading_result
+
+
+def test_invalid_input_type():
+    """Test that GradeStep rejects invalid input types."""
+    print("\n" + "=" * 80)
+    print("TEST: GradeStep with Invalid Input Type")
+    print("=" * 80)
+
+    submission_files = create_mock_submission()
+
+    grade_step = GradeStep(
+        submission_files=submission_files, submission_id="test_submission_3"
+    )
+
+    # Try to execute with invalid input (string)
+    result = grade_step.execute("invalid input")
+
+    # Verify it fails gracefully
+    assert result.status == StepStatus.FAIL, "Should fail with invalid input"
+    assert result.error is not None, "Should have error message"
+
+    print("✓ GradeStep correctly rejected invalid input")
+    print(f"  - Error: {result.error}")
+
+
+def run_all_tests():
+    """Run all unit tests."""
+    print("\n" + "#" * 80)
+    print("# RUNNING PIPELINE STEPS UNIT TESTS")
+    print("#" * 80)
+
+    try:
+        # Test 1: Build tree
+        criteria_tree = test_build_tree_step()
+
+        # Test 2: Grade from tree (multi-submission mode)
+        grading_result_tree = test_grade_from_tree()
+
+        # Test 3: Grade from config (single submission mode)
+        grading_result_config = test_grade_from_config()
+
+        # Test 4: Invalid input handling
+        test_invalid_input_type()
+
+        print("\n" + "#" * 80)
+        print("# ALL TESTS PASSED! ✓")
+        print("#" * 80)
+
+    except AssertionError as e:
+        print("\n" + "#" * 80)
+        print(f"# TEST FAILED: {e}")
+        print("#" * 80)
+        raise
+    except Exception as e:
+        print("\n" + "#" * 80)
+        print(f"# UNEXPECTED ERROR: {e}")
+        print("#" * 80)
+        import traceback
+
+        traceback.print_exc()
+        raise
+
+
+if __name__ == "__main__":
+    run_all_tests()

From c08ad35ebda86973fe8ea88f62931b3307580a67 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Tue, 20 Jan 2026 07:46:56 -0300
Subject: [PATCH 37/49] refactor: adding placeholders for better debugging

---
 autograder/models/dataclass/submission.py     |   3 +-
 autograder/pipeline.py                        |   3 +-
 .../services/template_library_service.py      |   8 +-
 tests/test_pipeline_modes.py                  | 252 ------------------
 4 files changed, 8 insertions(+), 258 deletions(-)
 delete mode 100644 tests/test_pipeline_modes.py

diff --git a/autograder/models/dataclass/submission.py b/autograder/models/dataclass/submission.py
index 44d8cc3..98ecd87 100644
--- a/autograder/models/dataclass/submission.py
+++ b/autograder/models/dataclass/submission.py
@@ -1,11 +1,12 @@
 from typing import List
 
-from autograder.models import dataclass
+from dataclasses import dataclass
 
 @dataclass
 class SubmissionFile:
     filename: str
     content: str
+
 @dataclass
 class Submission:
     username: str
diff --git a/autograder/pipeline.py b/autograder/pipeline.py
index 114f45b..0753031 100644
--- a/autograder/pipeline.py
+++ b/autograder/pipeline.py
@@ -12,8 +12,9 @@ def add_step(self, step: Step) -> None:
 
     def run(self, input_data:'Submission'):
         result = StepResult(data=input_data, status=StepStatus.SUCCESS, original_input=input_data) #Initialize result object with input data
-
+        print(result)
         for step in self._steps:
+            print("Executing step:", step.__class__.__name__)
             if not result.is_successful:
                 break
             try:
diff --git a/autograder/services/template_library_service.py b/autograder/services/template_library_service.py
index 8bb191f..756da05 100644
--- a/autograder/services/template_library_service.py
+++ b/autograder/services/template_library_service.py
@@ -1,8 +1,8 @@
 from autograder.models.abstract.template import Template
 from autograder.template_library.web_dev import WebDevTemplate
-from autograder.template_library.api_testing import ApiTestingTemplate
-from autograder.template_library.input_output import InputOutputTemplate
-from autograder.template_library.essay_grader import EssayGraderTemplate
+#from autograder.template_library.api_testing import ApiTestingTemplate
+#from autograder.template_library.input_output import InputOutputTemplate
+#from autograder.template_library.essay_grader import EssayGraderTemplate
 
 class TemplateLibraryService:
     def __init__(self):
@@ -12,7 +12,7 @@ def start_template(self, template_name: str) -> Template:
         """Initialize and return the template class based on the template name.
            If template requires sandboxing, it creates a sandboxed instance.
         """
-        pass
+        return WebDevTemplate() #That's a placeholder
 
     def get_template_info(self, template_name: str) -> dict:
         """Return metadata about the template."""
diff --git a/tests/test_pipeline_modes.py b/tests/test_pipeline_modes.py
deleted file mode 100644
index df88317..0000000
--- a/tests/test_pipeline_modes.py
+++ /dev/null
@@ -1,252 +0,0 @@
-"""
-Test the pipeline's ability to handle single vs multi-submission modes.
-
-This test verifies:
-1. Single submission mode: Grades directly from config (one-pass)
-2. Multi-submission mode: Builds tree once, grades multiple times
-"""
-
-import sys
-from pathlib import Path
-
-# Add project root to path
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from autograder.autograder import build_pipeline
-from autograder.models.dataclass.criteria_config import CriteriaConfig
-
-
-def create_simple_criteria():
-    """Create simple test criteria."""
-    return {
-        "base": {
-            "weight": 90,
-            "subjects": [
-                {
-                    "subject_name": "Basic Tests",
-                    "weight": 100,
-                    "tests": [
-                        {
-                            "name": "always_pass",
-                            "parameters": {}
-                        },
-                        {
-                            "name": "check_value",
-                            "parameters": {
-                                "expected": 42
-                            }
-                        }
-                    ]
-                }
-            ]
-        },
-        "bonus": {
-            "weight": 10,
-            "tests": [
-                {
-                    "name": "always_pass",
-                    "parameters": {}
-                }
-            ]
-        }
-    }
-
-
-def create_mock_submission():
-    """Create mock submission files."""
-    return {
-        "main.py": "value = 42\n"
-    }
-
-
-def test_single_submission_mode():
-    """Test single submission mode (grade directly from config)."""
-    print("\n" + "="*80)
-    print("TEST: Single Submission Mode (Direct from Config)")
-    print("="*80)
-
-    criteria = create_simple_criteria()
-    submission = create_mock_submission()
-
-    # Build pipeline for single submission
-    pipeline = build_pipeline(
-        template_name="input_output",
-        include_feedback=False,
-        grading_criteria=criteria,
-        feedback_config=None,
-        setup_config=None,
-        custom_template=None,
-        feedback_mode=None,
-        submission_files=submission,
-        submission_id="test_001",
-        is_multi_submission=False  # Single submission mode
-    )
-
-    # Verify pipeline steps
-    print("\nPipeline Steps:")
-    for i, step in enumerate(pipeline._steps):
-        print(f"  {i+1}. {step.__class__.__name__}")
-
-    print("\nExpected flow:")
-    print("  - TemplateLoaderStep loads the template")
-    print("  - GradeStep grades directly from config (one-pass)")
-    print("  - ExporterStep exports results")
-
-    # Verify GradeStep has criteria_json for single submission mode
-    grade_step = None
-    for step in pipeline._steps:
-        if step.__class__.__name__ == "GradeStep":
-            grade_step = step
-            break
-
-    assert grade_step is not None, "GradeStep not found in pipeline"
-    assert grade_step._criteria_json is not None, "GradeStep should have criteria_json in single mode"
-    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
-
-    print("\n✓ Single submission mode configured correctly")
-    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
-    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
-
-
-def test_multi_submission_mode():
-    """Test multi-submission mode (build tree, then grade)."""
-    print("\n" + "="*80)
-    print("TEST: Multi-Submission Mode (Tree Building)")
-    print("="*80)
-
-    criteria = create_simple_criteria()
-    submission = create_mock_submission()
-
-    # Build pipeline for multiple submissions
-    pipeline = build_pipeline(
-        template_name="input_output",
-        include_feedback=False,
-        grading_criteria=criteria,
-        feedback_config=None,
-        setup_config=None,
-        custom_template=None,
-        feedback_mode=None,
-        submission_files=submission,
-        submission_id="test_002",
-        is_multi_submission=True  # Multi-submission mode
-    )
-
-    # Verify pipeline steps
-    print("\nPipeline Steps:")
-    for i, step in enumerate(pipeline._steps):
-        print(f"  {i+1}. {step.__class__.__name__}")
-
-    print("\nExpected flow:")
-    print("  - TemplateLoaderStep loads the template")
-    print("  - BuildTreeStep builds criteria tree (reusable)")
-    print("  - GradeStep grades from tree")
-    print("  - ExporterStep exports results")
-
-    # Verify BuildTreeStep and GradeStep are present
-    has_build_tree = False
-    grade_step = None
-
-    for step in pipeline._steps:
-        if step.__class__.__name__ == "BuildTreeStep":
-            has_build_tree = True
-        elif step.__class__.__name__ == "GradeStep":
-            grade_step = step
-
-    assert has_build_tree, "BuildTreeStep not found in pipeline for multi-submission mode"
-    assert grade_step is not None, "GradeStep not found in pipeline"
-    assert grade_step._criteria_json is None, "GradeStep should NOT have criteria_json in multi mode"
-    assert grade_step._submission_files is not None, "GradeStep should have submission_files"
-
-    print("\n✓ Multi-submission mode configured correctly")
-    print(f"  - BuildTreeStep present: {has_build_tree}")
-    print(f"  - GradeStep has criteria_json: {grade_step._criteria_json is not None}")
-    print(f"  - GradeStep has submission_files: {grade_step._submission_files is not None}")
-
-
-def test_grade_step_input_detection():
-    """Test that GradeStep correctly detects input type."""
-    print("\n" + "="*80)
-    print("TEST: GradeStep Input Type Detection")
-    print("="*80)
-
-    from autograder.steps.grade_step import GradeStep
-    from autograder.models.abstract.template import Template
-    from autograder.models.criteria_tree import CriteriaTree, CategoryNode
-
-    criteria = create_simple_criteria()
-    submission = create_mock_submission()
-
-    # Test 1: GradeStep with Template input (single mode)
-    print("\n1. Testing with Template input (single submission mode):")
-    grade_step_single = GradeStep(
-        criteria_json=criteria,
-        submission_files=submission,
-        submission_id="test_single"
-    )
-
-    # Create a mock template
-    class MockTemplate(Template):
-        def __init__(self):
-            self.name = "mock_template"
-            self.tests = {}
-
-        def get_test(self, test_name):
-            # Return a mock test function
-            def mock_test(*args, **kwargs):
-                return {"passed": True, "score": 100}
-            return mock_test
-
-    mock_template = MockTemplate()
-
-    print("  - Input type: Template")
-    print("  - Expected behavior: Grade from config (one-pass)")
-    print("  ✓ GradeStep will use grade_from_config method")
-
-    # Test 2: GradeStep with CriteriaTree input (multi mode)
-    print("\n2. Testing with CriteriaTree input (multi-submission mode):")
-    grade_step_multi = GradeStep(
-        submission_files=submission,
-        submission_id="test_multi"
-    )
-
-    # Create a mock criteria tree
-    mock_tree = CriteriaTree(
-        base=CategoryNode(name="base", weight=100),
-        bonus=None,
-        penalty=None
-    )
-
-    print("  - Input type: CriteriaTree")
-    print("  - Expected behavior: Grade from tree (reusable)")
-    print("  ✓ GradeStep will use grade_from_tree method")
-
-
-if __name__ == "__main__":
-    print("\n" + "="*80)
-    print("PIPELINE MODE TESTS")
-    print("="*80)
-
-    try:
-        test_single_submission_mode()
-        test_multi_submission_mode()
-        test_grade_step_input_detection()
-
-        print("\n" + "="*80)
-        print("ALL TESTS PASSED ✓")
-        print("="*80)
-        print("\nSummary:")
-        print("  ✓ Single submission mode: Grades directly from config")
-        print("  ✓ Multi-submission mode: Builds tree once, grades multiple times")
-        print("  ✓ GradeStep correctly detects input type (Template vs CriteriaTree)")
-        print("  ✓ Pipeline configuration is flexible and optimized")
-
-    except AssertionError as e:
-        print(f"\n❌ TEST FAILED: {e}")
-        sys.exit(1)
-    except Exception as e:
-        print(f"\n❌ ERROR: {e}")
-        import traceback
-        traceback.print_exc()
-        sys.exit(1)
-

From 7a19be05606b2b4181f65e2a58abd2c7d3e0a7ca Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Tue, 20 Jan 2026 07:47:08 -0300
Subject: [PATCH 38/49] feat: add HTML grading pipeline test script

---
 pipeline_modes.py | 175 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)
 create mode 100644 pipeline_modes.py

diff --git a/pipeline_modes.py b/pipeline_modes.py
new file mode 100644
index 0000000..67f7851
--- /dev/null
+++ b/pipeline_modes.py
@@ -0,0 +1,175 @@
+"""
+Simple test script for grading an HTML assignment using the autograder pipeline.
+"""
+
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from autograder.autograder import build_pipeline
+from autograder.models.dataclass.submission import Submission, SubmissionFile
+
+
+def create_mock_html_submission():
+    """Create a mock HTML submission for testing."""
+    html_content = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Student Portfolio</title>
+    <link rel="stylesheet" href="style.css">
+</head>
+<body>
+    <header class="container">
+        <h1>John Doe - Portfolio</h1>
+        <nav>
+            <a href="#about">About</a>
+            <a href="#projects">Projects</a>
+        </nav>
+    </header>
+
+    <div class="container row">
+        <div class="col-md-6">
+            <h1>Welcome</h1>
+            <p>This is my portfolio website showcasing my work.</p>
+            <p>I'm a passionate developer with experience in web technologies.</p>
+            <p>Check out my projects below!</p>
+        </div>
+        <div class="col-md-6">
+            <div class="card">
+                <h2>About Me</h2>
+                <p>I love coding and creating amazing web experiences.</p>
+            </div>
+        </div>
+    </div>
+
+    <footer class="container">
+        <p>&copy; 2024 John Doe</p>
+    </footer>
+
+    <script src="script.js"></script>
+</body>
+</html>"""
+
+    submission_file = SubmissionFile(
+        filename="index.html",
+        content=html_content
+    )
+
+    submission = Submission(
+        username="student123",
+        user_id=12345,
+        assignment_id=1,
+        submission_files=[submission_file]
+    )
+
+    return submission
+
+
+def create_mock_grading_criteria():
+    """Create mock grading criteria for HTML assignment."""
+    return {
+        "base": {
+            "weight": 100,
+            "subjects": {
+                "html_structure": {
+                    "weight": 40,
+                    "tests": [
+                        {
+                            "name": "has_tag",
+                            "file": "index.html",
+                            "calls": [
+                                ["div", 5],
+                                ["h1", 2],
+                                ["p", 3],
+                                ["a", 2]
+                            ]
+                        }
+                    ]
+                },
+                "css_styling": {
+                    "weight": 30,
+                    "tests": [
+                        {
+                            "name": "has_class",
+                            "file": "index.html",
+                            "calls": [
+                                [["container", "row", "col-*"], 10]
+                            ]
+                        }
+                    ]
+                }
+            }
+        }
+    }
+
+
+def create_mock_feedback_config():
+    """Create mock feedback configuration."""
+    return {
+        "general": {
+            "report_title": "Web Development Assignment Feedback",
+            "show_score": True,
+            "show_passed_tests": False,
+            "add_report_summary": True
+        },
+        "default": {
+            "category_headers": {
+                "base": "Core Web Development Requirements",
+                "html_structure": "HTML Structure & Semantics",
+                "css_styling": "CSS Styling & Design"
+            }
+        }
+    }
+
+
+def html_grading_pipeline():
+    """Test the autograder pipeline with HTML assignment."""
+    print("\n" + "="*70)
+    print("HTML ASSIGNMENT GRADING TEST")
+    print("="*70 + "\n")
+
+    # Create mock data
+    print("📄 Creating mock HTML submission...")
+    submission = create_mock_html_submission()
+
+    print("⚙️  Creating grading criteria...")
+    grading_criteria = create_mock_grading_criteria()
+
+    print("📋 Creating feedback configuration...")
+    feedback_config = create_mock_feedback_config()
+
+    # Build the pipeline
+    print("🔧 Building autograder pipeline...")
+    pipeline = build_pipeline(
+        template_name="webdev",
+        include_feedback=False,  # Set to True to include feedback generation
+        grading_criteria=grading_criteria,
+        feedback_config=feedback_config,
+        setup_config=None,
+        custom_template=None,
+        feedback_mode=None,
+        submission_files={sf.filename: sf.content for sf in submission.submission_files}
+    )
+
+    print("✅ Pipeline built successfully!\n")
+    print("Pipeline steps:")
+    for i, step in enumerate(pipeline._steps, 1):
+        print(f"  {i}. {step.__class__.__name__}")
+
+    print("\n" + "="*70)
+    print("Pipeline is ready. You can now implement the rest!")
+    print("="*70 + "\n")
+
+    return pipeline
+
+
+if __name__ == "__main__":
+    pipeline = html_grading_pipeline()
+
+    pipeline.run(create_mock_html_submission())
+

From 8f9375c464e6a3ce23e30e151ed1cce2bc99607f Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Tue, 20 Jan 2026 20:19:05 -0300
Subject: [PATCH 39/49] refactor: remove criteria_config.py

---
 .../models/dataclass/criteria_config.py       | 102 ------------------
 1 file changed, 102 deletions(-)
 delete mode 100644 autograder/models/dataclass/criteria_config.py

diff --git a/autograder/models/dataclass/criteria_config.py b/autograder/models/dataclass/criteria_config.py
deleted file mode 100644
index 08d553a..0000000
--- a/autograder/models/dataclass/criteria_config.py
+++ /dev/null
@@ -1,102 +0,0 @@
-"""
-Pydantic models for validating criteria configuration JSON structure.
-
-New schema structure:
-- Subjects are arrays with 'subject_name' field
-- Parameters are named objects: [{"name": "param", "value": "val"}, ...]
-- Tests contain parameters directly (no 'calls' array)
-- Root config has optional 'test_library' field
-"""
-from pydantic import BaseModel, Field, field_validator
-from typing import List, Dict, Any, Optional, Union
-
-
-class ParameterConfig(BaseModel):
-    """Named parameter for a test function."""
-    name: str = Field(..., description="Parameter name")
-    value: Any = Field(..., description="Parameter value")
-
-    model_config = {"extra": "forbid"}
-
-
-class TestConfig(BaseModel):
-    """Configuration for a single test execution."""
-    name: str = Field(..., description="Name of the test function in the template")
-    file: Optional[str] = Field(None, description="Target file for the test (if applicable)")
-    parameters: Optional[List[ParameterConfig]] = Field(
-        default_factory=list,
-        description="Named parameters for the test function"
-    )
-
-    model_config = {"extra": "forbid"}
-
-    def get_args_list(self) -> List[Any]:
-        """Convert named parameters to positional arguments list."""
-        if not self.parameters:
-            return []
-        return [param.value for param in self.parameters]
-
-    def get_kwargs_dict(self) -> Dict[str, Any]:
-        """Convert named parameters to keyword arguments dictionary."""
-        if not self.parameters:
-            return {}
-        return {param.name: param.value for param in self.parameters}
-
-
-class SubjectConfig(BaseModel):
-    """Configuration for a subject node (can contain tests or nested subjects)."""
-    subject_name: str = Field(..., description="Name of the subject")
-    weight: float = Field(..., ge=0, le=100, description="Weight of this subject (0-100)")
-    tests: Optional[List[TestConfig]] = Field(None, description="Tests under this subject")
-    subjects: Optional[List['SubjectConfig']] = Field(None, description="Nested subjects")
-
-    model_config = {"extra": "forbid"}
-
-    def model_post_init(self, __context):
-        """Validate that subject has either tests or subjects, but not both or neither."""
-        has_tests = self.tests is not None and len(self.tests) > 0
-        has_subjects = self.subjects is not None and len(self.subjects) > 0
-
-        if has_tests and has_subjects:
-            raise ValueError(f"Subject '{self.subject_name}' cannot have both 'tests' and 'subjects'. Choose one.")
-        if not has_tests and not has_subjects:
-            raise ValueError(f"Subject '{self.subject_name}' must have either 'tests' or 'subjects'.")
-
-
-class CategoryConfig(BaseModel):
-    """Configuration for a category (base, bonus, or penalty)."""
-    weight: float = Field(..., ge=0, le=100, description="Weight of this category (0-100)")
-    subjects: Optional[List[SubjectConfig]] = Field(None, description="Subjects under this category (array)")
-    tests: Optional[List[TestConfig]] = Field(None, description="Tests directly under category")
-
-    model_config = {"extra": "forbid"}
-
-    def model_post_init(self, __context):
-        """Validate that category has either tests or subjects."""
-        has_tests = self.tests is not None and len(self.tests) > 0
-        has_subjects = self.subjects is not None and len(self.subjects) > 0
-
-        if has_tests and has_subjects:
-            raise ValueError("Category cannot have both 'tests' and 'subjects'. Choose one.")
-        if not has_tests and not has_subjects:
-            raise ValueError("Category must have either 'tests' or 'subjects'.")
-
-
-class CriteriaConfig(BaseModel):
-    """Root configuration for grading criteria."""
-    test_library: Optional[str] = Field(None, description="Name of the test library/template to use")
-    base: CategoryConfig = Field(..., description="Base grading criteria (required)")
-    bonus: Optional[CategoryConfig] = Field(None, description="Bonus points criteria")
-    penalty: Optional[CategoryConfig] = Field(None, description="Penalty criteria")
-
-    model_config = {"extra": "forbid"}
-
-    @classmethod
-    def from_dict(cls, data: dict) -> 'CriteriaConfig':
-        """Create and validate criteria config from dictionary."""
-        return cls.model_validate(data)
-
-    @classmethod
-    def from_json(cls, json_str: str) -> 'CriteriaConfig':
-        """Create and validate criteria config from JSON string."""
-        return cls.model_validate_json(json_str)

From d310456b6f55e9662d85487ef4f6dd0185969299 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Fri, 23 Jan 2026 22:17:11 -0300
Subject: [PATCH 40/49] refactor: update configuration model imports and field
 names for clarity

---
 autograder/models/config/category.py         |   3 +-
 autograder/models/config/subject.py          |   2 +-
 autograder/services/parsers/criteria_tree.py |   2 +-
 autograder/steps/build_tree_step.py          |   2 +-
 tests/unit/test_config_models.py             | 518 +++++++++++++++++++
 5 files changed, 522 insertions(+), 5 deletions(-)
 create mode 100644 tests/unit/test_config_models.py

diff --git a/autograder/models/config/category.py b/autograder/models/config/category.py
index e553e53..3c25f73 100644
--- a/autograder/models/config/category.py
+++ b/autograder/models/config/category.py
@@ -6,9 +6,8 @@
 
 
 class CategoryConfig(BaseModel):
-    name: str = Field(..., description="Name of the subject")
     weight: float = Field(
-        ..., ge=0, le=100, description="Weight of this subject (0-100)"
+        ..., ge=0, le=100, description="Weight of this category (0-100)"
     )
     tests: Optional[List[TestConfig]] = Field(
         None, description="Tests under this subject"
diff --git a/autograder/models/config/subject.py b/autograder/models/config/subject.py
index 5338323..6910137 100644
--- a/autograder/models/config/subject.py
+++ b/autograder/models/config/subject.py
@@ -4,7 +4,7 @@
 
 
 class SubjectConfig(BaseModel):
-    name: str = Field(..., description="Name of the subject")
+    subject_name: str = Field(..., description="Name of the subject")
     weight: float = Field(
         ..., ge=0, le=100, description="Weight of this subject (0-100)"
     )
diff --git a/autograder/services/parsers/criteria_tree.py b/autograder/services/parsers/criteria_tree.py
index db359ee..dcf590c 100644
--- a/autograder/services/parsers/criteria_tree.py
+++ b/autograder/services/parsers/criteria_tree.py
@@ -24,7 +24,7 @@ def __parse_subjects(self, configs: List[SubjectConfig]) -> List[SubjectNode]:
         return subjects
 
     def __parse_subject(self, config: SubjectConfig) -> SubjectNode:
-        subject = SubjectNode(config.name, config.weight)
+        subject = SubjectNode(config.subject_name, config.weight)
 
         subject.subjects_weight = config.subjects_weight
 
diff --git a/autograder/steps/build_tree_step.py b/autograder/steps/build_tree_step.py
index 6aea989..100aa17 100644
--- a/autograder/steps/build_tree_step.py
+++ b/autograder/steps/build_tree_step.py
@@ -2,7 +2,7 @@
 from autograder.models.criteria_tree import CriteriaTree
 from autograder.models.abstract.step import Step
 from autograder.models.abstract.template import Template
-from autograder.models.dataclass.criteria_config import CriteriaConfig
+from autograder.models.config.criteria import CriteriaConfig
 from autograder.models.dataclass.step_result import StepResult, StepStatus
 
 
diff --git a/tests/unit/test_config_models.py b/tests/unit/test_config_models.py
new file mode 100644
index 0000000..ce63a39
--- /dev/null
+++ b/tests/unit/test_config_models.py
@@ -0,0 +1,518 @@
+"""Test suite for configuration models to ensure they align with criteria_schema.json"""
+import json
+import pytest
+from pathlib import Path
+
+from autograder.models.config.criteria import CriteriaConfig
+from autograder.models.config.category import CategoryConfig
+from autograder.models.config.subject import SubjectConfig
+from autograder.models.config.test import TestConfig, ParameterConfig
+
+
+@pytest.fixture
+def criteria_schema_path():
+    """Path to the criteria_schema.json file"""
+    return Path(__file__).parent.parent.parent / "critera_schema.json"
+
+
+@pytest.fixture
+def criteria_schema_dict(criteria_schema_path):
+    """Load criteria schema as dictionary"""
+    with open(criteria_schema_path, 'r') as f:
+        return json.load(f)
+
+
+@pytest.fixture
+def criteria_config(criteria_schema_dict):
+    """Parse criteria schema into CriteriaConfig object"""
+    return CriteriaConfig.from_dict(criteria_schema_dict)
+
+
+class TestParameterConfig:
+    """Test ParameterConfig model"""
+
+    def test_parameter_config_creation(self):
+        """Test creating a ParameterConfig"""
+        param = ParameterConfig(name="tag", value="body")
+        assert param.name == "tag"
+        assert param.value == "body"
+
+    def test_parameter_config_with_int_value(self):
+        """Test ParameterConfig with integer value"""
+        param = ParameterConfig(name="required_count", value=1)
+        assert param.name == "required_count"
+        assert param.value == 1
+        assert isinstance(param.value, int)
+
+    def test_parameter_config_forbid_extra(self):
+        """Test that extra fields are forbidden"""
+        with pytest.raises(ValueError):
+            ParameterConfig(name="tag", value="body", extra_field="not allowed")
+
+
+class TestTestConfig:
+    """Test TestConfig model"""
+
+    def test_test_config_with_file_and_name(self):
+        """Test creating a TestConfig with file and name"""
+        test = TestConfig(
+            file="index.html",
+            name="has_tag",
+            parameters=[
+                ParameterConfig(name="tag", value="body"),
+                ParameterConfig(name="required_count", value=1)
+            ]
+        )
+        assert test.file == "index.html"
+        assert test.name == "has_tag"
+        assert len(test.parameters) == 2
+
+    def test_test_config_without_parameters(self):
+        """Test TestConfig without parameters"""
+        test = TestConfig(file="index.html", name="check_css_linked")
+        assert test.file == "index.html"
+        assert test.name == "check_css_linked"
+        assert test.parameters is None
+
+    def test_get_args_list(self):
+        """Test converting parameters to args list"""
+        test = TestConfig(
+            file="index.html",
+            name="has_tag",
+            parameters=[
+                ParameterConfig(name="tag", value="body"),
+                ParameterConfig(name="required_count", value=1)
+            ]
+        )
+        args = test.get_args_list()
+        assert args == ["body", 1]
+
+    def test_get_kwargs_dict(self):
+        """Test converting parameters to kwargs dict"""
+        test = TestConfig(
+            file="index.html",
+            name="has_tag",
+            parameters=[
+                ParameterConfig(name="tag", value="body"),
+                ParameterConfig(name="required_count", value=1)
+            ]
+        )
+        kwargs = test.get_kwargs_dict()
+        assert kwargs == {"tag": "body", "required_count": 1}
+
+    def test_get_args_list_empty(self):
+        """Test get_args_list with no parameters"""
+        test = TestConfig(file="index.html", name="check_css_linked")
+        args = test.get_args_list()
+        assert args == []
+
+    def test_get_kwargs_dict_empty(self):
+        """Test get_kwargs_dict with no parameters"""
+        test = TestConfig(file="index.html", name="check_css_linked")
+        kwargs = test.get_kwargs_dict()
+        assert kwargs == {}
+
+
+class TestSubjectConfig:
+    """Test SubjectConfig model"""
+
+    def test_subject_config_with_tests(self):
+        """Test SubjectConfig with tests only"""
+        subject = SubjectConfig(
+            subject_name="structure",
+            weight=40,
+            tests=[
+                TestConfig(file="index.html", name="has_tag", parameters=[
+                    ParameterConfig(name="tag", value="body")
+                ])
+            ]
+        )
+        assert subject.subject_name == "structure"
+        assert subject.weight == 40
+        assert len(subject.tests) == 1
+        assert subject.subjects is None
+
+    def test_subject_config_with_nested_subjects(self):
+        """Test SubjectConfig with nested subjects"""
+        subject = SubjectConfig(
+            subject_name="html",
+            weight=60,
+            subjects=[
+                SubjectConfig(
+                    subject_name="structure",
+                    weight=40,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )
+            ]
+        )
+        assert subject.subject_name == "html"
+        assert subject.weight == 60
+        assert len(subject.subjects) == 1
+        assert subject.subjects[0].subject_name == "structure"
+
+    def test_subject_config_validation_requires_tests_or_subjects(self):
+        """Test that SubjectConfig requires at least tests or subjects"""
+        with pytest.raises(ValueError, match="must have at least 'tests' or 'subjects'"):
+            SubjectConfig(subject_name="invalid", weight=50)
+
+    def test_subject_config_with_both_tests_and_subjects_requires_subjects_weight(self):
+        """Test that having both tests and subjects requires subjects_weight"""
+        with pytest.raises(ValueError, match="needs 'subjects_weight' defined"):
+            SubjectConfig(
+                subject_name="mixed",
+                weight=50,
+                tests=[TestConfig(file="index.html", name="has_tag")],
+                subjects=[SubjectConfig(
+                    subject_name="nested",
+                    weight=30,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )]
+            )
+
+    def test_subject_config_with_both_tests_and_subjects_with_weight(self):
+        """Test that having both tests and subjects works with subjects_weight"""
+        subject = SubjectConfig(
+            subject_name="mixed",
+            weight=50,
+            tests=[TestConfig(file="index.html", name="has_tag")],
+            subjects=[SubjectConfig(
+                subject_name="nested",
+                weight=30,
+                tests=[TestConfig(file="index.html", name="has_tag")]
+            )],
+            subjects_weight=60
+        )
+        assert subject.subject_name == "mixed"
+        assert subject.subjects_weight == 60
+
+
+class TestCategoryConfig:
+    """Test CategoryConfig model"""
+
+    def test_category_config_with_subjects(self):
+        """Test CategoryConfig with subjects"""
+        category = CategoryConfig(
+            weight=100,
+            subjects=[
+                SubjectConfig(
+                    subject_name="html",
+                    weight=60,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )
+            ]
+        )
+        assert category.weight == 100
+        assert len(category.subjects) == 1
+        assert category.subjects[0].subject_name == "html"
+
+    def test_category_config_validation_requires_tests_or_subjects(self):
+        """Test that CategoryConfig requires at least tests or subjects"""
+        with pytest.raises(ValueError, match="must have at least 'tests' or 'subjects'"):
+            CategoryConfig(weight=100)
+
+    def test_category_config_with_both_tests_and_subjects_requires_subjects_weight(self):
+        """Test that having both tests and subjects requires subjects_weight"""
+        with pytest.raises(ValueError, match="needs 'subjects_weight' defined"):
+            CategoryConfig(
+                weight=100,
+                tests=[TestConfig(file="index.html", name="has_tag")],
+                subjects=[SubjectConfig(
+                    subject_name="nested",
+                    weight=30,
+                    tests=[TestConfig(file="index.html", name="has_tag")]
+                )]
+            )
+
+
+class TestCriteriaConfig:
+    """Test CriteriaConfig model"""
+
+    def test_criteria_config_basic(self):
+        """Test basic CriteriaConfig creation"""
+        criteria = CriteriaConfig(
+            test_library="web_dev",
+            base=CategoryConfig(
+                weight=100,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="html",
+                        weight=60,
+                        tests=[TestConfig(file="index.html", name="has_tag")]
+                    )
+                ]
+            )
+        )
+        assert criteria.test_library == "web_dev"
+        assert criteria.base.weight == 100
+        assert criteria.bonus is None
+        assert criteria.penalty is None
+
+    def test_criteria_config_with_all_categories(self):
+        """Test CriteriaConfig with base, bonus, and penalty"""
+        criteria = CriteriaConfig(
+            test_library="web_dev",
+            base=CategoryConfig(
+                weight=100,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="html",
+                        weight=60,
+                        tests=[TestConfig(file="index.html", name="has_tag")]
+                    )
+                ]
+            ),
+            bonus=CategoryConfig(
+                weight=40,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="accessibility",
+                        weight=20,
+                        tests=[TestConfig(file="index.html", name="check_all_images_have_alt")]
+                    )
+                ]
+            ),
+            penalty=CategoryConfig(
+                weight=50,
+                subjects=[
+                    SubjectConfig(
+                        subject_name="html",
+                        weight=50,
+                        tests=[TestConfig(file="index.html", name="check_bootstrap_usage")]
+                    )
+                ]
+            )
+        )
+        assert criteria.test_library == "web_dev"
+        assert criteria.base.weight == 100
+        assert criteria.bonus.weight == 40
+        assert criteria.penalty.weight == 50
+
+
+class TestSchemaIntegration:
+    """Integration tests with the actual criteria_schema.json file"""
+
+    def test_parse_full_schema(self, criteria_config):
+        """Test that the full schema parses successfully"""
+        assert isinstance(criteria_config, CriteriaConfig)
+        assert criteria_config.test_library == "web_dev"
+
+    def test_base_category_parsed(self, criteria_config):
+        """Test that base category is parsed correctly"""
+        assert criteria_config.base is not None
+        assert criteria_config.base.weight == 100
+        assert len(criteria_config.base.subjects) == 2
+
+    def test_html_subject_structure(self, criteria_config):
+        """Test HTML subject structure"""
+        html_subject = criteria_config.base.subjects[0]
+        assert html_subject.subject_name == "html"
+        assert html_subject.weight == 60
+        assert len(html_subject.subjects) == 2  # structure and link
+
+    def test_html_structure_subject(self, criteria_config):
+        """Test HTML structure subject"""
+        html_subject = criteria_config.base.subjects[0]
+        structure_subject = html_subject.subjects[0]
+        assert structure_subject.subject_name == "structure"
+        assert structure_subject.weight == 40
+        assert len(structure_subject.tests) == 12
+
+    def test_html_link_subject(self, criteria_config):
+        """Test HTML link subject"""
+        html_subject = criteria_config.base.subjects[0]
+        link_subject = html_subject.subjects[1]
+        assert link_subject.subject_name == "link"
+        assert link_subject.weight == 20
+        assert len(link_subject.tests) == 2
+
+    def test_css_subject_structure(self, criteria_config):
+        """Test CSS subject structure"""
+        css_subject = criteria_config.base.subjects[1]
+        assert css_subject.subject_name == "css"
+        assert css_subject.weight == 40
+        assert len(css_subject.subjects) == 2  # responsivity and style
+
+    def test_css_responsivity_subject(self, criteria_config):
+        """Test CSS responsivity subject"""
+        css_subject = criteria_config.base.subjects[1]
+        responsivity_subject = css_subject.subjects[0]
+        assert responsivity_subject.subject_name == "responsivity"
+        assert responsivity_subject.weight == 50
+        assert len(responsivity_subject.tests) == 3
+
+    def test_css_style_subject(self, criteria_config):
+        """Test CSS style subject"""
+        css_subject = criteria_config.base.subjects[1]
+        style_subject = css_subject.subjects[1]
+        assert style_subject.subject_name == "style"
+        assert style_subject.weight == 50
+        assert len(style_subject.tests) == 7
+
+    def test_bonus_category_parsed(self, criteria_config):
+        """Test that bonus category is parsed correctly"""
+        assert criteria_config.bonus is not None
+        assert criteria_config.bonus.weight == 40
+        assert len(criteria_config.bonus.subjects) == 2
+
+    def test_accessibility_bonus_subject(self, criteria_config):
+        """Test accessibility bonus subject"""
+        accessibility_subject = criteria_config.bonus.subjects[0]
+        assert accessibility_subject.subject_name == "accessibility"
+        assert accessibility_subject.weight == 20
+        assert len(accessibility_subject.tests) == 1
+
+    def test_head_detail_bonus_subject(self, criteria_config):
+        """Test head_detail bonus subject"""
+        head_detail_subject = criteria_config.bonus.subjects[1]
+        assert head_detail_subject.subject_name == "head_detail"
+        assert head_detail_subject.weight == 80
+        assert len(head_detail_subject.tests) == 7
+
+    def test_penalty_category_parsed(self, criteria_config):
+        """Test that penalty category is parsed correctly"""
+        assert criteria_config.penalty is not None
+        assert criteria_config.penalty.weight == 50
+        assert len(criteria_config.penalty.subjects) == 2
+
+    def test_html_penalty_subject(self, criteria_config):
+        """Test HTML penalty subject"""
+        html_penalty_subject = criteria_config.penalty.subjects[0]
+        assert html_penalty_subject.subject_name == "html"
+        assert html_penalty_subject.weight == 50
+        assert len(html_penalty_subject.tests) == 6
+
+    def test_project_structure_penalty_subject(self, criteria_config):
+        """Test project_structure penalty subject"""
+        project_structure_subject = criteria_config.penalty.subjects[1]
+        assert project_structure_subject.subject_name == "project_structure"
+        assert project_structure_subject.weight == 50
+        assert len(project_structure_subject.tests) == 3
+
+    def test_test_config_structure(self, criteria_config):
+        """Test that test configs are structured correctly"""
+        # Get first test from structure subject
+        html_subject = criteria_config.base.subjects[0]
+        structure_subject = html_subject.subjects[0]
+        first_test = structure_subject.tests[0]
+
+        assert first_test.file == "index.html"
+        assert first_test.name == "has_tag"
+        assert len(first_test.parameters) == 2
+        assert first_test.parameters[0].name == "tag"
+        assert first_test.parameters[0].value == "body"
+        assert first_test.parameters[1].name == "required_count"
+        assert first_test.parameters[1].value == 1
+
+    def test_test_without_parameters(self, criteria_config):
+        """Test parsing of tests without parameters"""
+        # Get check_css_linked test from link subject
+        html_subject = criteria_config.base.subjects[0]
+        link_subject = html_subject.subjects[1]
+        check_css_test = link_subject.tests[0]
+
+        assert check_css_test.file == "index.html"
+        assert check_css_test.name == "check_css_linked"
+        assert check_css_test.parameters is None or len(check_css_test.parameters) == 0
+
+    def test_parameter_value_types(self, criteria_config):
+        """Test that parameter values maintain correct types"""
+        html_subject = criteria_config.base.subjects[0]
+        structure_subject = html_subject.subjects[0]
+
+        # Check string value
+        tag_param = structure_subject.tests[0].parameters[0]
+        assert isinstance(tag_param.value, str)
+
+        # Check integer value
+        count_param = structure_subject.tests[0].parameters[1]
+        assert isinstance(count_param.value, int)
+
+    def test_from_json_method(self, criteria_schema_path):
+        """Test parsing from JSON string"""
+        with open(criteria_schema_path, 'r') as f:
+            json_str = f.read()
+
+        criteria = CriteriaConfig.from_json(json_str)
+        assert isinstance(criteria, CriteriaConfig)
+        assert criteria.test_library == "web_dev"
+
+    def test_from_dict_method(self, criteria_schema_dict):
+        """Test parsing from dictionary"""
+        criteria = CriteriaConfig.from_dict(criteria_schema_dict)
+        assert isinstance(criteria, CriteriaConfig)
+        assert criteria.test_library == "web_dev"
+
+    def test_round_trip_serialization(self, criteria_config):
+        """Test that we can serialize and deserialize the config"""
+        # Convert to dict
+        config_dict = criteria_config.model_dump()
+
+        # Parse back from dict
+        reparsed = CriteriaConfig.from_dict(config_dict)
+
+        # Verify they match
+        assert reparsed.test_library == criteria_config.test_library
+        assert reparsed.base.weight == criteria_config.base.weight
+        assert len(reparsed.base.subjects) == len(criteria_config.base.subjects)
+
+    def test_weight_validation(self):
+        """Test that weight validation works"""
+        with pytest.raises(ValueError):
+            SubjectConfig(
+                subject_name="invalid",
+                weight=150,  # Over 100
+                tests=[TestConfig(file="test.html", name="test")]
+            )
+
+        with pytest.raises(ValueError):
+            SubjectConfig(
+                subject_name="invalid",
+                weight=-10,  # Negative
+                tests=[TestConfig(file="test.html", name="test")]
+            )
+
+    def test_extra_fields_forbidden(self):
+        """Test that extra fields are forbidden at all levels"""
+        # Test at criteria level
+        with pytest.raises(ValueError):
+            CriteriaConfig(
+                test_library="web_dev",
+                base=CategoryConfig(
+                    weight=100,
+                    subjects=[
+                        SubjectConfig(
+                            subject_name="html",
+                            weight=60,
+                            tests=[TestConfig(file="index.html", name="has_tag")]
+                        )
+                    ]
+                ),
+                extra_field="not allowed"
+            )
+
+
+class TestWeightCalculations:
+    """Test weight-related calculations and validations"""
+
+    def test_subject_weights_sum(self, criteria_config):
+        """Verify that subject weights in base category sum correctly"""
+        base_subjects = criteria_config.base.subjects
+        total_weight = sum(subject.weight for subject in base_subjects)
+        assert total_weight == 100  # html=60 + css=40
+
+    def test_nested_subject_weights_sum(self, criteria_config):
+        """Verify that nested subject weights sum correctly"""
+        html_subject = criteria_config.base.subjects[0]
+        nested_subjects = html_subject.subjects
+        total_weight = sum(subject.weight for subject in nested_subjects)
+        assert total_weight == 60  # structure=40 + link=20
+
+        css_subject = criteria_config.base.subjects[1]
+        nested_subjects = css_subject.subjects
+        total_weight = sum(subject.weight for subject in nested_subjects)
+        assert total_weight == 100  # responsivity=50 + style=50
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
+

From 47dcc07e6203a4dc417d3afba1cd464ac208ec3b Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Fri, 23 Jan 2026 22:25:36 -0300
Subject: [PATCH 41/49] refactor: update configuration model imports and field
 names for clarity

---
 tests/unit/test_pipeline_steps.py | 64 +++++++------------------------
 1 file changed, 14 insertions(+), 50 deletions(-)

diff --git a/tests/unit/test_pipeline_steps.py b/tests/unit/test_pipeline_steps.py
index 8cd5692..78e890a 100644
--- a/tests/unit/test_pipeline_steps.py
+++ b/tests/unit/test_pipeline_steps.py
@@ -9,6 +9,9 @@
 
 import sys
 from pathlib import Path
+from typing import List
+
+from autograder.models.dataclass.param_description import ParamDescription
 
 # Add project root to path
 project_root = Path(__file__).parent.parent.parent
@@ -16,7 +19,6 @@
 
 from autograder.steps.build_tree_step import BuildTreeStep
 from autograder.steps.grade_step import GradeStep
-from autograder.models.config.criteria import CriteriaConfig
 from autograder.models.dataclass.step_result import StepStatus
 from autograder.models.abstract.template import Template
 from autograder.models.abstract.test_function import TestFunction
@@ -38,14 +40,17 @@ def name(self):
     def description(self):
         return f"Mock test function: {self._test_name}"
 
+    @property
+    def parameter_description(self) -> List[ParamDescription]:
+        return []
+
     def execute(self, *args, **kwargs):
         """Always return a passing result."""
         return TestResult(
             test_name=self._test_name,
-            passed=True,
-            score=100.0,
-            max_score=100.0,
-            message="Test passed (mock)",
+            score=1000,
+            report="Test passed",
+            parameters=None
         )
 
 
@@ -202,8 +207,7 @@ def test_grade_from_tree():
 
     # Create and execute grade step
     grade_step = GradeStep(
-        submission_files=submission_files, submission_id="test_submission_1"
-    )
+        submission_files=submission_files)
 
     result = grade_step.execute(criteria_tree)
 
@@ -224,45 +228,6 @@ def test_grade_from_tree():
 
     return grading_result
 
-
-def test_grade_from_config():
-    """Test that GradeStep can grade directly from config (single submission mode)."""
-    print("\n" + "=" * 80)
-    print("TEST: GradeStep with Template (Single Submission Mode)")
-    print("=" * 80)
-
-    # Create criteria and template
-    criteria = create_simple_criteria()
-    template = MockTemplate()
-    submission_files = create_mock_submission()
-
-    # Create and execute grade step (without building tree first)
-    grade_step = GradeStep(
-        criteria_json=criteria,
-        submission_files=submission_files,
-        submission_id="test_submission_2",
-    )
-
-    result = grade_step.execute(template)
-
-    # Verify result
-    assert result.status == StepStatus.SUCCESS, f"Grade step failed: {result.error}"
-    assert result.data is not None, "GradingResult is None"
-
-    grading_result = result.data
-
-    print("✓ GradeStep successfully graded from config")
-    print(f"  - Final Score: {grading_result.final_score}")
-    print(f"  - Status: {grading_result.status}")
-
-    # Print result tree
-    if grading_result.result_tree:
-        print("\nResult Tree:")
-        grading_result.result_tree.print_tree()
-
-    return grading_result
-
-
 def test_invalid_input_type():
     """Test that GradeStep rejects invalid input types."""
     print("\n" + "=" * 80)
@@ -272,8 +237,7 @@ def test_invalid_input_type():
     submission_files = create_mock_submission()
 
     grade_step = GradeStep(
-        submission_files=submission_files, submission_id="test_submission_3"
-    )
+        submission_files=submission_files)
 
     # Try to execute with invalid input (string)
     result = grade_step.execute("invalid input")
@@ -300,7 +264,7 @@ def run_all_tests():
         grading_result_tree = test_grade_from_tree()
 
         # Test 3: Grade from config (single submission mode)
-        grading_result_config = test_grade_from_config()
+        #grading_result_config = test_grade_from_config()
 
         # Test 4: Invalid input handling
         test_invalid_input_type()
@@ -325,4 +289,4 @@ def run_all_tests():
 
 
 if __name__ == "__main__":
-    run_all_tests()
+    run_all_tests()
\ No newline at end of file

From 2d0d84a4f997a9003ea1cfa0526e597b5b409faf Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Fri, 23 Jan 2026 22:26:02 -0300
Subject: [PATCH 42/49] refactor: update parameters type in TestResult to
 Optional for better clarity

---
 autograder/models/dataclass/test_result.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/autograder/models/dataclass/test_result.py b/autograder/models/dataclass/test_result.py
index d7b80a3..70bc93e 100644
--- a/autograder/models/dataclass/test_result.py
+++ b/autograder/models/dataclass/test_result.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
-from typing import Dict, Any
+from typing import Dict, Any, Optional
+
 
 @dataclass
 class TestResult:
@@ -9,7 +10,7 @@ class TestResult:
     score: int
     report: str
     subject_name: str = ""
-    parameters: Dict[str, Any] = field(default_factory=dict)
+    parameters: Optional[Dict[str, Any]] = field(default_factory=dict)
 
     def get_result(self, *args, **kwargs) :
         return [self]

From 5c3e1a978afd5114a12de07740e0d6f462858fd9 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Sat, 24 Jan 2026 18:04:17 -0300
Subject: [PATCH 43/49] feat: implement full pipeline test for BuildTreeStep
 and GradeStep

---
 tests/unit/test_pipeline_steps.py | 62 ++++++++++++++-----------------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/tests/unit/test_pipeline_steps.py b/tests/unit/test_pipeline_steps.py
index 78e890a..ac7e931 100644
--- a/tests/unit/test_pipeline_steps.py
+++ b/tests/unit/test_pipeline_steps.py
@@ -12,6 +12,7 @@
 from typing import List
 
 from autograder.models.dataclass.param_description import ParamDescription
+from autograder.pipeline import AutograderPipeline
 
 # Add project root to path
 project_root = Path(__file__).parent.parent.parent
@@ -250,43 +251,34 @@ def test_invalid_input_type():
     print(f"  - Error: {result.error}")
 
 
-def run_all_tests():
-    """Run all unit tests."""
-    print("\n" + "#" * 80)
-    print("# RUNNING PIPELINE STEPS UNIT TESTS")
-    print("#" * 80)
-
-    try:
-        # Test 1: Build tree
-        criteria_tree = test_build_tree_step()
-
-        # Test 2: Grade from tree (multi-submission mode)
-        grading_result_tree = test_grade_from_tree()
-
-        # Test 3: Grade from config (single submission mode)
-        #grading_result_config = test_grade_from_config()
-
-        # Test 4: Invalid input handling
-        test_invalid_input_type()
+def test_build_tree_and_grade_pipeline():
+    """Test full pipeline: BuildTreeStep followed by GradeStep."""
+    print("\n" + "=" * 80)
+    print("TEST: Full Pipeline (BuildTreeStep + GradeStep)")
+    print("=" * 80)
 
-        print("\n" + "#" * 80)
-        print("# ALL TESTS PASSED! ✓")
-        print("#" * 80)
+    # Create criteria and template
+    criteria = create_simple_criteria()
+    template = MockTemplate()
+    submission_files = create_mock_submission()
 
-    except AssertionError as e:
-        print("\n" + "#" * 80)
-        print(f"# TEST FAILED: {e}")
-        print("#" * 80)
-        raise
-    except Exception as e:
-        print("\n" + "#" * 80)
-        print(f"# UNEXPECTED ERROR: {e}")
-        print("#" * 80)
-        import traceback
+    # Build tree
+    build_step = BuildTreeStep(criteria)
+    # Grade submission
+    grade_step = GradeStep(
+        submission_files=submission_files)
 
-        traceback.print_exc()
-        raise
+    pipeline = AutograderPipeline()
+    pipeline.add_step(build_step)
+    pipeline.add_step(grade_step)
+    grading_result = pipeline.run(input_data=template)
 
+    # Verify result
+    assert grading_result.status == "success", f"Pipeline failed: {grading_result.error}"
+    print("✓ Full pipeline successfully built tree and graded submission")
+    print(f"  - Final Score: {grading_result.final_score}")
 
-if __name__ == "__main__":
-    run_all_tests()
\ No newline at end of file
+    # Print result tree
+    if grading_result.result_tree:
+        print("\nResult Tree:")
+        grading_result.result_tree.print_tree()
\ No newline at end of file

From cbb625064212077782ce7d99a9fcbf8546155c66 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 26 Jan 2026 23:16:47 -0300
Subject: [PATCH 44/49] feat: introduce PipelineExecution class to manage
 pipeline execution and step results

---
 .../models/dataclass/pipeline_execution.py    | 33 +++++++++++++++++++
 autograder/models/dataclass/step_result.py    | 12 ++++++-
 connectors/__init__.py                        |  0
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 autograder/models/dataclass/pipeline_execution.py
 delete mode 100644 connectors/__init__.py

diff --git a/autograder/models/dataclass/pipeline_execution.py b/autograder/models/dataclass/pipeline_execution.py
new file mode 100644
index 0000000..8622eef
--- /dev/null
+++ b/autograder/models/dataclass/pipeline_execution.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+from typing import List
+
+from autograder.models.dataclass.step_result import StepResult, StepName
+from autograder.models.dataclass.submission import Submission
+
+
+@dataclass
+class PipelineExecution:
+    """
+    Main object of the autograder pipeline, keeps track of the execution and step results.
+
+    Attributes:
+        step_results (list): A list of StepResult objects representing the results of each step in the pipeline.
+        assignment_id (str): The unique identifier for the assignment being graded.
+        submission (Submission): The submission being processed in the pipeline.
+    """
+    step_results: List[StepResult]
+    assignment_id: str
+    submission: Submission
+
+    def add_step_result(self, step_result: StepResult) -> 'PipelineExecution':
+        self.step_results.append(step_result)
+        return self
+
+    def get_step_result(self, step_name: StepName) -> StepResult:
+        for step_result in self.step_results:
+            if step_result.step == step_name:
+                return step_result
+        raise ValueError(f"Step {step_name} was not executed in the pipeline.")
+
+    def get_previous_step(self):
+        return self.step_results[-1] if self.step_results else None
diff --git a/autograder/models/dataclass/step_result.py b/autograder/models/dataclass/step_result.py
index a64686b..dfdf9c1 100644
--- a/autograder/models/dataclass/step_result.py
+++ b/autograder/models/dataclass/step_result.py
@@ -10,12 +10,22 @@ class StepStatus(Enum):
     FAIL = "fail"
 
 
+class StepName(Enum):
+    BOOTSTRAP = "BootstrapStep"
+    LOAD_TEMPLATE = "LoadTemplateStep"
+    BUILD_TREE = "BuildTreeStep"
+    PRE_FLIGHT = "PreFlightStep"
+    GRADE = "GradeStep"
+    FEEDBACK = "FeedbackStep"
+    EXPORTER = "ExporterStep"
+
+
 @dataclass
 class StepResult(Generic[T]):
+    step: StepName
     data: T
     status: StepStatus = StepStatus.SUCCESS
     error: Optional[str] = None
-    failed_at_step: Optional[str] = None
     original_input: Any = None
 
     @property
diff --git a/connectors/__init__.py b/connectors/__init__.py
deleted file mode 100644
index e69de29..0000000

From 05216865af56da6acbaef25c14198a7ae425c820 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 26 Jan 2026 23:17:30 -0300
Subject: [PATCH 45/49] feat: update step execution methods to use
 PipelineExecution as input type

---
 autograder/steps/build_tree_step.py    | 22 ++++++-------
 autograder/steps/export_step.py        | 21 ++++++------
 autograder/steps/feedback_step.py      |  5 +--
 autograder/steps/grade_step.py         | 44 +++++++++++---------------
 autograder/steps/load_template_step.py |  3 +-
 autograder/steps/pre_flight_step.py    | 36 +++++++++++----------
 6 files changed, 67 insertions(+), 64 deletions(-)

diff --git a/autograder/steps/build_tree_step.py b/autograder/steps/build_tree_step.py
index 100aa17..3ca7bbc 100644
--- a/autograder/steps/build_tree_step.py
+++ b/autograder/steps/build_tree_step.py
@@ -1,9 +1,8 @@
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
 from autograder.services.criteria_tree_service import CriteriaTreeService
-from autograder.models.criteria_tree import CriteriaTree
 from autograder.models.abstract.step import Step
-from autograder.models.abstract.template import Template
 from autograder.models.config.criteria import CriteriaConfig
-from autograder.models.dataclass.step_result import StepResult, StepStatus
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
 
 
 class BuildTreeStep(Step):
@@ -24,7 +23,7 @@ def __init__(self, criteria_json: dict):
         self._criteria_json = criteria_json
         self._criteria_tree_service = CriteriaTreeService()
 
-    def execute(self, input: Template) -> StepResult[CriteriaTree]:
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
         """
         Build a criteria tree from the configuration and template.
 
@@ -37,24 +36,25 @@ def execute(self, input: Template) -> StepResult[CriteriaTree]:
         try:
             # Validate criteria configuration
             criteria_config = CriteriaConfig.from_dict(self._criteria_json)
-
+            template = input.get_step_result(StepName.LOAD_TEMPLATE).data
             # Build the criteria tree with embedded test functions
             criteria_tree = self._criteria_tree_service.build_tree(
                 criteria_config,
-                input
+                template
             )
 
-            return StepResult(
+            return input.add_step_result(StepResult(
+                step="BuildTreeStep",
                 data=criteria_tree,
                 status=StepStatus.SUCCESS,
                 original_input=input
-            )
+            ))
 
         except Exception as e:
-            return StepResult(
+            return input.add_step_result(StepResult(
+                step="BuildTreeStep",
                 data=None,
                 status=StepStatus.FAIL,
                 error=f"Failed to build criteria tree: {str(e)}",
-                failed_at_step=self.__class__.__name__,
                 original_input=input
-            )
+            ))
diff --git a/autograder/steps/export_step.py b/autograder/steps/export_step.py
index 245cd18..75e00aa 100644
--- a/autograder/steps/export_step.py
+++ b/autograder/steps/export_step.py
@@ -1,28 +1,31 @@
 from autograder.models.abstract.step import Step
-from autograder.models.dataclass.step_result import StepResult, StepStatus
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
+
 
 class ExporterStep(Step):
     def __init__(self, remote_driver):
         self._remote_driver = remote_driver # UpstashDriver
-    def execute(self, input) -> StepResult:
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
         try:
             # Extract username and score from input
-            username = input.username
-            score = input.score
+            username = input.submission.username
+            score = input.get_step_result(StepName.GRADE).data.final_score
 
             # Set the score using UpstashDriver
             self._remote_driver.set_score(username, score)
 
             # Return success result
-            return StepResult(
+            return input.add_step_result(StepResult(
+                step=StepName.EXPORTER,
                 data={"username": username, "score": score},
                 status=StepStatus.SUCCESS
-            )
+            ))
         except Exception as e:
             # Return failure result
-            return StepResult(
+            return input.add_step_result(StepResult(
+                step=StepName.EXPORTER,
                 data=None,
                 status=StepStatus.FAIL,
                 error=str(e),
-                failed_at_step="ExporterStep"
-            )
+            ))
diff --git a/autograder/steps/feedback_step.py b/autograder/steps/feedback_step.py
index 185d39d..8c38526 100644
--- a/autograder/steps/feedback_step.py
+++ b/autograder/steps/feedback_step.py
@@ -1,5 +1,6 @@
 from autograder.models.dataclass.grading_result import GradingResult
 from autograder.models.abstract.step import Step
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
 from autograder.services.report.reporter_service import ReporterService
 
 
@@ -10,8 +11,8 @@ def __init__(self,
         self._reporter_service = reporter_service
         self._feedback_config = feedback_config
 
-    def execute(self, input: GradingResult) -> GradingResult:
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
         """Adds feedback to the grading result using the reporter service."""
         feedback = self._reporter_service.generate_feedback()
         input.feedback = feedback
-        return input
+        return
diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py
index 2d6f3cf..e152c91 100644
--- a/autograder/steps/grade_step.py
+++ b/autograder/steps/grade_step.py
@@ -1,7 +1,7 @@
-from typing import Dict, Any, Optional
-from autograder.models.criteria_tree import CriteriaTree
+
 from autograder.models.dataclass.grading_result import GradingResult
-from autograder.models.dataclass.step_result import StepResult, StepStatus
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
 from autograder.models.abstract.step import Step
 from autograder.services.grader_service import GraderService
 
@@ -16,9 +16,7 @@ class GradeStep(Step):
     """
 
     def __init__(
-        self,
-        submission_files: Dict[str, Any],
-    ):
+        self    ):
         """
         Initialize the grade step.
 
@@ -26,23 +24,23 @@ def __init__(
             criteria_json: Raw criteria configuration (only needed for single submission mode)
             submission_files: Student submission files
         """
-        self._submission_files = submission_files
         self._grader_service = GraderService()
 
-    def execute(self, input: CriteriaTree) -> StepResult[GradingResult]:
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
         """
         Grade a submission based on the input type.
 
         Args:
-            input: Either a CriteriaTree (multi-submission mode) or Template (single submission mode)
+            _input: Either a CriteriaTree (multi-submission mode) or Template (single submission mode)
 
         Returns:
             StepResult containing GradingResult with scores and result tree
         """
         try:
+            criteria_tree = input.get_step_result(StepName.BUILD_TREE).data
             result_tree = self._grader_service.grade_from_tree(
-                criteria_tree=input,
-                submission_files=self._submission_files
+                criteria_tree=criteria_tree,
+                submission_files=input.submission.submission_files
             )
 
             # Create grading result
@@ -52,23 +50,19 @@ def execute(self, input: CriteriaTree) -> StepResult[GradingResult]:
                 final_score=final_score, status="success", result_tree=result_tree
             )
 
-            return StepResult(
-                data=grading_result, status=StepStatus.SUCCESS, original_input=input
-            )
+            return input.add_step_result(StepResult(
+                step=StepName.GRADE,
+                data=grading_result,
+                status=StepStatus.SUCCESS,
+                original_input=input
+            ))
 
         except Exception as e:
             # Return error result
-            grading_result = GradingResult(
-                final_score=0.0,
-                status="error",
-                error=f"Grading failed: {str(e)}",
-                failed_at_step=self.__class__.__name__,
-            )
-
-            return StepResult(
-                data=grading_result,
+            return input.add_step_result(StepResult(
+                step="GradeStep",
+                data=None,
                 status=StepStatus.FAIL,
                 error=str(e),
-                failed_at_step=self.__class__.__name__,
                 original_input=input,
-            )
+            ))
diff --git a/autograder/steps/load_template_step.py b/autograder/steps/load_template_step.py
index b9dece4..cea948f 100644
--- a/autograder/steps/load_template_step.py
+++ b/autograder/steps/load_template_step.py
@@ -1,3 +1,4 @@
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
 from autograder.services.template_library_service import TemplateLibraryService
 from autograder.models.abstract.step import Step
 
@@ -8,5 +9,5 @@ def __init__(self, template_name: str, custom_template = None):
         self._custom_template = custom_template
         self._template_service = TemplateLibraryService()
 
-    def execute(self, input):
+    def execute(self, input) -> PipelineExecution:
         pass
\ No newline at end of file
diff --git a/autograder/steps/pre_flight_step.py b/autograder/steps/pre_flight_step.py
index 76f0785..dd0e450 100644
--- a/autograder/steps/pre_flight_step.py
+++ b/autograder/steps/pre_flight_step.py
@@ -1,5 +1,6 @@
 from autograder.models.abstract.step import Step
-from autograder.models.dataclass.step_result import StepResult, StepStatus
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
 from autograder.services.pre_flight_service import PreFlightService
 
 
@@ -18,47 +19,50 @@ def __init__(self, setup_config):
         self._setup_config = setup_config
         self._pre_flight_service = PreFlightService(setup_config)
 
-    def execute(self, input) -> StepResult:
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
         """
         Execute pre-flight checks on the submission.
 
         Args:
-            input: Submission data (typically file list or submission object)
+            input: PipelineExecution containing submission data
 
         Returns:
             StepResult with status SUCCESS if all checks pass, FAIL otherwise
         """
         # Check required files first
+        submission_files = input.submission.submission_files
         if self._setup_config.get('required_files'):
-            files_ok = self._pre_flight_service.check_required_files(input)
+            files_ok = self._pre_flight_service.check_required_files(submission_files)
             if not files_ok:
                 # File check failed, don't continue to setup commands
-                return StepResult(
-                    data=input,
-                    status=StepStatus.FAIL,
-                    error=self._format_errors(),
-                    failed_at_step=self.__class__.__name__,
-                    original_input=input
-                )
+                return input.add_step_result(StepResult(
+                        step=StepName.PRE_FLIGHT,
+                        data=input,
+                        status=StepStatus.FAIL,
+                        error=self._format_errors(),
+                        original_input=input
+                        ))
+
 
         # Check setup commands only if file check passed
         if self._setup_config.get('setup_commands'):
             setup_ok = self._pre_flight_service.check_setup_commands()
             if not setup_ok:
-                return StepResult(
+                return input.add_step_result(StepResult(
+                    step=StepName.PRE_FLIGHT,
                     data=input,
                     status=StepStatus.FAIL,
                     error=self._format_errors(),
-                    failed_at_step=self.__class__.__name__,
                     original_input=input
-                )
+                ))
 
         # All checks passed
-        return StepResult(
+        return input.add_step_result(StepResult(
+            step=StepName.PRE_FLIGHT,
             data=input,
             status=StepStatus.SUCCESS,
             original_input=input
-        )
+        ))
 
     def _format_errors(self) -> str:
         """Format all preflight errors into a single error message."""

From f83e06bf5499852f67a77bf3e0336cc707d89467 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 26 Jan 2026 23:19:14 -0300
Subject: [PATCH 46/49] refactor: remove unused parameters from autograder
 pipeline configuration

---
 autograder/autograder.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/autograder/autograder.py b/autograder/autograder.py
index 0c7de6b..b6f2fb3 100644
--- a/autograder/autograder.py
+++ b/autograder/autograder.py
@@ -16,8 +16,7 @@ def build_pipeline(
                  feedback_config,
                  setup_config = None,
                  custom_template = None,
-                 feedback_mode = None,
-                 submission_files = None):
+                 feedback_mode = None):
     """
     Build an autograder pipeline based on configuration.
 
@@ -29,8 +28,6 @@ def build_pipeline(
         setup_config: Pre-flight setup configuration
         custom_template: Custom template object (if any)
         feedback_mode: Mode for feedback generation
-        submission_files: Student submission files
-        submission_id: Optional submission identifier
     Returns:
         Configured AutograderPipeline
     """
@@ -45,9 +42,7 @@ def build_pipeline(
     if setup_config:
         pipeline.add_step(PreFlightStep(setup_config))
 
-    pipeline.add_step(GradeStep(
-        submission_files=submission_files,
-    )) # Generates GradingResult with final score and result tree
+    pipeline.add_step(GradeStep()) # Generates GradingResult with final score and result tree
 
     # Feedback generation (if configured)
     if include_feedback:
@@ -60,7 +55,3 @@ def build_pipeline(
     return pipeline
 
 
-
-
-
-

From 893d27805fd150747bf4f453e93c607e0026624c Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 26 Jan 2026 23:24:14 -0300
Subject: [PATCH 47/49] feat: enhance pipeline execution flow with
 PipelineExecution management

---
 autograder/pipeline.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/autograder/pipeline.py b/autograder/pipeline.py
index 0753031..2edde5f 100644
--- a/autograder/pipeline.py
+++ b/autograder/pipeline.py
@@ -1,6 +1,7 @@
 from autograder.models.dataclass.grading_result import GradingResult
 from autograder.models.abstract.step import Step
-from autograder.models.dataclass.step_result import StepResult, StepStatus
+from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepResult, StepStatus, StepName
 
 
 class AutograderPipeline:
@@ -11,20 +12,29 @@ def add_step(self, step: Step) -> None:
         self._steps.append(step)
 
     def run(self, input_data:'Submission'):
-        result = StepResult(data=input_data, status=StepStatus.SUCCESS, original_input=input_data) #Initialize result object with input data
-        print(result)
+        result = StepResult(
+            step=StepName.BOOTSTRAP,
+            data=input_data,
+            status=StepStatus.SUCCESS)
+        #Initialize result object with input data
+        pipeline_execution = PipelineExecution(step_results=[], assignment_id="assignment_123", submission=input_data) #Example assignment_id
+        pipeline_execution.add_step_result(result)
+
         for step in self._steps:
             print("Executing step:", step.__class__.__name__)
-            if not result.is_successful:
+            if not result.get_previous_step.is_successful:
                 break
             try:
-                result = step.execute(result.data)
+                result = step.execute(result)
             except Exception as e:
-                result.error = str(e)
-                result.status = StepStatus.FAIL
-                result.failed_at_step = step.__class__.__name__
-
-        if not result.is_successful:
+                StepResult(
+                    step=step.__class__.__name__,
+                    data=None,
+                    status=StepStatus.FAIL,
+                    error=str(e),
+                )
+
+        if not result.is_successful: #Change this to report a PipelineExecution error with result details
             return GradingResult( #Maybe return a ErrorResponse object?
                 final_score=0.0,
                 status="error",
@@ -34,7 +44,7 @@ def run(self, input_data:'Submission'):
                 failed_at_step=result.failed_at_step,
             )
         else:
-            return result.data # Assuming the final step returns a GradingResult
+            return result.get_step_result(StepName.GRADE).data # How to return with feedback? How to know when there's no feedback?
 
 
 

From 1c939ef80f47fd30926f070aaedf39df4cd68935 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Mon, 26 Jan 2026 23:35:02 -0300
Subject: [PATCH 48/49] feat: update load_template_step to use
 PipelineExecution in execute method

---
 autograder/steps/load_template_step.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/autograder/steps/load_template_step.py b/autograder/steps/load_template_step.py
index cea948f..9d2d133 100644
--- a/autograder/steps/load_template_step.py
+++ b/autograder/steps/load_template_step.py
@@ -9,5 +9,10 @@ def __init__(self, template_name: str, custom_template = None):
         self._custom_template = custom_template
         self._template_service = TemplateLibraryService()
 
-    def execute(self, input) -> PipelineExecution:
-        pass
\ No newline at end of file
+    def execute(self, input: PipelineExecution) -> PipelineExecution:
+        if self._custom_template:
+            return self._template_service.load_custom_template(self._custom_template) #TODO: Implement Custom Template Loading with Sandboxed Env
+        else:
+            return self._template_service.load_builtin_template(self._template_name) # Load built-in template similar to custom to avoid code duplication
+
+

From c5cbceda36fe2afc06a45bd223b96fac16eda397 Mon Sep 17 00:00:00 2001
From: ArthurCRodrigues <arthurcarvalhorodrigues2409@gmail.com>
Date: Tue, 27 Jan 2026 07:07:50 -0300
Subject: [PATCH 49/49] feat: enhance feedback_step to generate feedback using
 grading result

---
 autograder/steps/feedback_step.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/autograder/steps/feedback_step.py b/autograder/steps/feedback_step.py
index 8c38526..0ec27a4 100644
--- a/autograder/steps/feedback_step.py
+++ b/autograder/steps/feedback_step.py
@@ -1,6 +1,7 @@
 from autograder.models.dataclass.grading_result import GradingResult
 from autograder.models.abstract.step import Step
 from autograder.models.dataclass.pipeline_execution import PipelineExecution
+from autograder.models.dataclass.step_result import StepName
 from autograder.services.report.reporter_service import ReporterService
 
 
@@ -13,6 +14,9 @@ def __init__(self,
 
     def execute(self, input: PipelineExecution) -> PipelineExecution:
         """Adds feedback to the grading result using the reporter service."""
-        feedback = self._reporter_service.generate_feedback()
-        input.feedback = feedback
-        return
+        try:
+            result_tree = input.get_step_result(StepName.GRADE).data
+            feedback = self._reporter_service.generate_feedback(
+                grading_result=result_tree,
+                feedback_config=self._feedback_config
+            ) #TODO: Implement generate_feedback method @joaovitoralvarenga