diff --git a/pantheon/factory/templates/prompts/delegation.md b/pantheon/factory/templates/prompts/delegation.md
index fd6d61a72..63c6c0fc1 100644
--- a/pantheon/factory/templates/prompts/delegation.md
+++ b/pantheon/factory/templates/prompts/delegation.md
@@ -91,3 +91,4 @@ call_agent(
 ```python
 call_agent("researcher", "Do analysis fast.")
 ```
+
diff --git a/pantheon/factory/templates/teams/default.md b/pantheon/factory/templates/teams/default.md
index 76aa2be6a..5ceadf74c 100644
--- a/pantheon/factory/templates/teams/default.md
+++ b/pantheon/factory/templates/teams/default.md
@@ -88,10 +88,12 @@ call_agent("researcher", "Search the web for best practices on X. Gather informa
 - Data analysis, EDA, statistical analysis
 - Literature review and multi-source research
 
+**Scientific writing gate (MANDATORY):** Before writing any report, paper, or document that requires domain knowledge or citations, you MUST first delegate a research task to `researcher`. Writing without a prior research delegation is not allowed for these task types.
+
 #### Scientific Illustrator
 
-**Delegate for:** Scientific diagrams, publication-quality visualizations, complex figures
-**Execute directly:** Simple chart embedding, displaying existing charts
+**Delegate for:** Schematic diagrams, conceptual illustrations, architecture diagrams, publication-quality figures — tasks where the output is a conceptual diagram, not a data-driven chart.
+**Execute directly (or via Researcher):** Data visualizations, statistical plots, charts derived from analysis results.
 
 ### Decision Summary
 
@@ -100,9 +102,11 @@ call_agent("researcher", "Search the web for best practices on X. Gather informa
 | Explore/read/understand codebase | **MUST delegate** to researcher |
 | Web search or documentation lookup | **MUST delegate** to researcher |
 | Data analysis or research | **MUST delegate** to researcher |
+| Scientific writing (report/paper) | **MUST delegate research first**, then write |
 | Multiple independent research tasks | **MUST parallelize** with multiple researchers |
+| Schematic/pathway/cell diagrams | **Delegate** to scientific_illustrator |
 | Read 1 known file | Execute directly |
-| Write/edit/create files | Execute directly |
+| Write/edit/create files (post-research) | Execute directly |
 | Synthesize researcher results | Execute directly (your core role) |
 
 {{delegation}}
diff --git a/pantheon/toolsets/file/file_manager.py b/pantheon/toolsets/file/file_manager.py
index 24da1a3c9..f08534adf 100644
--- a/pantheon/toolsets/file/file_manager.py
+++ b/pantheon/toolsets/file/file_manager.py
@@ -816,35 +816,51 @@ async def write_file(
         file_path: str,
         content: str = "",
         overwrite: bool = True,
+        append: bool = False,
     ) -> dict:
-        """Use this tool to CREATE NEW file.
+        """Create a new file, overwrite an existing one, or append to it.
 
-        This tool writes content to a file, automatically creating parent
-        directories if they do not exist.
+        Parent directories are created automatically if they do not exist.
 
-        IMPORTANT: For EDITING existing file, use `update_file` instead.
-        DO NOT rewrite entire file when only small changes are needed, its is wasteful and error-prone.
+        For EDITING existing files, prefer `update_file` instead — it is
+        safer and more efficient for partial modifications.
 
         Use this tool when:
         - Creating a brand new file
-        - Completely rewriting a file from scratch (rare)
+        - Completely rewriting a file from scratch
+        - Appending content to an existing file (set append=True)
 
-        DO NOT use this tool when:
-        - Making partial modifications to an existing file
-        - Changing a few lines in a large file
-        - For these cases, use `update_file` instead
+        Do NOT use this tool when:
+        - Making partial modifications to an existing file (use `update_file`)
+        - Changing a few lines in a large file (use `update_file`)
 
         Args:
             file_path: The path to the file to write.
             content: The content to write to the file.
-            overwrite: When False, abort if the target file already exists.
-                       Default is True, but consider using update_file for edits.
+            overwrite: When False, abort if the target file already exists (ignored when append=True).
+            append: When True, append content to the end of an existing file instead of overwriting.
+                    The file must already exist when using append mode.
 
         Returns:
             dict: Success status or error message.
         """
-
         target_path = self._resolve_path(file_path)
+
+        if append:
+            if not target_path.exists():
+                return {
+                    "success": False,
+                    "error": f"File '{file_path}' does not exist. Use write_file without append=True to create it first.",
+                    "reason": "file_not_found",
+                }
+            try:
+                with open(target_path, "a", encoding="utf-8") as f:
+                    f.write(content)
+                return {"success": True, "appended_chars": len(content)}
+            except Exception as exc:
+                logger.error(f"write_file(append) failed for {file_path}: {exc}")
+                return {"success": False, "error": str(exc)}
+
         if not overwrite and target_path.exists():
             return {
                 "success": False,
diff --git a/pantheon/utils/adapters/openai_adapter.py b/pantheon/utils/adapters/openai_adapter.py
index bdedd2ede..e343b9975 100644
--- a/pantheon/utils/adapters/openai_adapter.py
+++ b/pantheon/utils/adapters/openai_adapter.py
@@ -73,7 +73,6 @@ def _normalize_response_format(response_format: Any) -> Any:
         pass
     return response_format
 
-
 class OpenAIAdapter(BaseAdapter):
     """Adapter for OpenAI and OpenAI-compatible APIs."""
 
diff --git a/pantheon/utils/llm.py b/pantheon/utils/llm.py
index 3e70b1e70..575105f8e 100644
--- a/pantheon/utils/llm.py
+++ b/pantheon/utils/llm.py
@@ -241,6 +241,7 @@ async def acompletion_responses(
     """
     from openai import AsyncOpenAI
     from .llm_providers import get_proxy_kwargs
+    from .provider_registry import get_model_info, get_output_token_param
 
     # ========== Build client ==========
     proxy_kwargs = get_proxy_kwargs()
@@ -257,7 +258,19 @@ async def acompletion_responses(
     # ========== Convert inputs ==========
     instructions, input_items = _convert_messages_to_responses_input(messages)
     converted_tools = _convert_tools_for_responses(tools)
-    extra_params = _convert_model_params_for_responses(model_params)
+    response_model_params = dict(model_params or {})
+    if not any(
+        key in response_model_params
+        for key in ("max_tokens", "max_completion_tokens", "max_output_tokens")
+    ):
+        try:
+            max_out = get_model_info(model).get("max_output_tokens")
+            token_param = get_output_token_param(model, api_mode="responses")
+            if token_param and max_out and max_out > 0:
+                response_model_params[token_param] = max_out
+        except Exception:
+            pass
+    extra_params = _convert_model_params_for_responses(response_model_params)
 
     # ========== Build kwargs ==========
     kwargs: dict[str, Any] = {
@@ -553,7 +566,13 @@ async def acompletion(
        - Uses native SDK adapters (openai, anthropic, google-genai)
     """
     from .llm_providers import get_proxy_kwargs
-    from .provider_registry import find_provider_for_model, get_provider_config, completion_cost
+    from .provider_registry import (
+        find_provider_for_model,
+        get_provider_config,
+        completion_cost,
+        get_model_info,
+        get_output_token_param,
+    )
     from .adapters import get_adapter
 
     logger.debug(f"[ACOMPLETION] Starting LLM call | Model={model}")
@@ -562,6 +581,23 @@ async def acompletion(
     provider_key, model_name, provider_config = find_provider_for_model(model)
     sdk_type = provider_config.get("sdk", "openai")
 
+    # ========== Ensure output token limit is set from the catalog ==========
+    # Different vendors use different parameter names for the same concept.
+    # The catalog records the preferred parameter name; we use it here so the
+    # first request is correct for known providers/models.
+    model_params = dict(model_params or {})
+    if not any(
+        key in model_params
+        for key in ("max_tokens", "max_completion_tokens", "max_output_tokens")
+    ):
+        try:
+            max_out = get_model_info(model).get("max_output_tokens")
+            token_param = get_output_token_param(model, api_mode="chat")
+            if token_param and max_out and max_out > 0:
+                model_params[token_param] = max_out
+        except Exception:
+            pass  # Fall through to provider default
+
     # ========== Mode Detection & Configuration ==========
     proxy_kwargs = get_proxy_kwargs()
     if proxy_kwargs:
diff --git a/pantheon/utils/llm_catalog.json b/pantheon/utils/llm_catalog.json
index 6f1d47e9e..71f5dc9d4 100644
--- a/pantheon/utils/llm_catalog.json
+++ b/pantheon/utils/llm_catalog.json
@@ -7,6 +7,8 @@
       "base_url": "https://api.openai.com/v1",
       "api_key_env": "OPENAI_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_completion_tokens",
+      "responses_output_token_param": "max_output_tokens",
       "models": {
         "gpt-5.4-pro": {
           "max_input_tokens": 1000000,
@@ -184,6 +186,22 @@
           "supports_computer_use": false,
           "supports_assistant_prefill": false
         },
+        "gpt-4o-mini": {
+          "max_input_tokens": 128000,
+          "max_output_tokens": 16384,
+          "input_cost_per_million": 0.15,
+          "output_cost_per_million": 0.6,
+          "supports_vision": true,
+          "supports_function_calling": true,
+          "supports_response_schema": true,
+          "supports_reasoning": false,
+          "supports_audio_input": false,
+          "supports_audio_output": false,
+          "supports_web_search": false,
+          "supports_pdf_input": false,
+          "supports_computer_use": false,
+          "supports_assistant_prefill": false
+        },
         "o3-pro": {
           "max_input_tokens": 200000,
           "max_output_tokens": 100000,
@@ -307,6 +325,7 @@
       "base_url": "https://api.anthropic.com",
       "api_key_env": "ANTHROPIC_API_KEY",
       "openai_compatible": false,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "claude-opus-4-6": {
           "max_input_tokens": 1000000,
@@ -428,6 +447,7 @@
       "base_url": "https://generativelanguage.googleapis.com",
       "api_key_env": "GEMINI_API_KEY",
       "openai_compatible": false,
+      "chat_output_token_param": "max_output_tokens",
       "models": {
         "gemini-3.1-pro-preview": {
           "max_input_tokens": 2000000,
@@ -560,6 +580,7 @@
       "base_url": "https://api.deepseek.com/v1",
       "api_key_env": "DEEPSEEK_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "deepseek-chat": {
           "max_input_tokens": 131072,
@@ -601,6 +622,7 @@
       "base_url": "https://open.bigmodel.cn/api/paas/v4",
       "api_key_env": "ZAI_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "glm-5": {
           "max_input_tokens": 131072,
@@ -706,6 +728,7 @@
       "base_url": "https://api.minimax.io/v1",
       "api_key_env": "MINIMAX_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "MiniMax-M2.7": {
           "max_input_tokens": 1000000,
@@ -795,6 +818,7 @@
       "base_url": "https://api.moonshot.ai/v1",
       "api_key_env": "MOONSHOT_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "kimi-k2.5": {
           "max_input_tokens": 131072,
@@ -836,6 +860,7 @@
       "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
       "api_key_env": "DASHSCOPE_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "qwen3-235b-a22b": {
           "max_input_tokens": 131072,
@@ -989,6 +1014,7 @@
       "base_url": "https://api.groq.com/openai/v1",
       "api_key_env": "GROQ_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_completion_tokens",
       "models": {
         "openai/gpt-oss-120b": {
           "max_input_tokens": 131072,
@@ -1110,6 +1136,7 @@
       "base_url": "https://openrouter.ai/api/v1",
       "api_key_env": "OPENROUTER_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "anthropic/claude-sonnet-4-6": {
           "max_input_tokens": 1000000,
@@ -1183,6 +1210,7 @@
       "base_url": "https://api.mistral.ai/v1",
       "api_key_env": "MISTRAL_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "mistral-large-latest": {
           "max_input_tokens": 262144,
@@ -1272,6 +1300,7 @@
       "base_url": "https://api.together.xyz/v1",
       "api_key_env": "TOGETHER_API_KEY",
       "openai_compatible": true,
+      "chat_output_token_param": "max_tokens",
       "models": {
         "Qwen/Qwen3.5-397B-A17B": {
           "max_input_tokens": 262144,
@@ -1346,6 +1375,7 @@
       "api_key_env": "",
       "openai_compatible": false,
       "auth_mode": "oauth",
+      "responses_output_token_param": "max_output_tokens",
       "models": {
         "gpt-5.4": {
           "max_input_tokens": 1000000,
@@ -1406,6 +1436,7 @@
       "api_key_env": "",
       "openai_compatible": true,
       "local": true,
+      "chat_output_token_param": "max_tokens",
       "models": {}
     }
   }
diff --git a/pantheon/utils/provider_registry.py b/pantheon/utils/provider_registry.py
index 5d92596bd..8f8c6bd7b 100644
--- a/pantheon/utils/provider_registry.py
+++ b/pantheon/utils/provider_registry.py
@@ -34,7 +34,6 @@
     "supports_assistant_prefill": False,
 }
 
-
 @lru_cache(maxsize=1)
 def load_catalog() -> dict:
     """Load and cache the provider catalog from llm_catalog.json."""
@@ -98,6 +97,21 @@ def get_provider_config(provider: str) -> dict:
     return catalog.get("providers", {}).get(provider, {})
 
 
+def get_output_token_param(model: str, api_mode: str = "chat") -> str | None:
+    """Return the provider/model-specific output token parameter name.
+
+    Args:
+        model: Model string, e.g. ``openai/gpt-5.4`` or ``gpt-4o-mini``.
+        api_mode: ``chat`` for chat/completions style APIs, ``responses`` for
+            OpenAI Responses-style APIs.
+    """
+    _provider_key, _model_name, provider_config = find_provider_for_model(model)
+    if api_mode == "responses":
+        return provider_config.get("responses_output_token_param")
+
+    return provider_config.get("chat_output_token_param")
+
+
 # ============ Model Metadata ============
 
 
diff --git a/scripts/test_two_phase_live.py b/scripts/test_two_phase_live.py
new file mode 100644
index 000000000..a68d96591
--- /dev/null
+++ b/scripts/test_two_phase_live.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""Live integration test: long paper + long code file writing.
+
+Verifies that after removing size guards, the LLM can write large files
+directly without truncation (root cause fixed by max_tokens auto-detection).
+
+Requires: OPENAI_API_KEY
+
+Usage:
+    OPENAI_API_KEY=sk-... python scripts/test_two_phase_live.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import sys
+import tempfile
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+
+async def run_scenario(name, task, make_checks, model="openai/gpt-4.1-mini"):
+    from pantheon.agent import Agent
+    from pantheon.toolsets.file import FileManagerToolSet
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        print(f"\n{'─' * 70}")
+        print(f"  Scenario: {name}")
+        print(f"  Model: {model}")
+        print(f"{'─' * 70}")
+
+        fm = FileManagerToolSet("file_manager", tmpdir)
+        agent = Agent(
+            name="writer",
+            model=model,
+            instructions=(
+                "You are a skilled developer and writer. "
+                "Use file tools (write_file, update_file, read_file) to complete tasks. "
+                "Write complete, production-quality content — do NOT leave stubs or placeholders."
+            ),
+        )
+        await agent.toolset(fm)
+
+        calls = []
+        rejections = 0
+
+        async def log(msg):
+            nonlocal rejections
+            if msg.get("role") == "assistant":
+                for tc in msg.get("tool_calls", []) or []:
+                    fn = tc.get("function", {})
+                    tool_name = fn.get("name", "?").replace("file_manager__", "")
+                    args_len = len(fn.get("arguments", ""))
+                    calls.append(tool_name)
+                    print(f"    {tool_name} ({args_len:,} chars)")
+            elif msg.get("role") == "tool":
+                c = str(msg.get("content", ""))
+                if "content_too_large" in c:
+                    rejections += 1
+                    print(f"    -> REJECTED")
+
+        resp = await agent.run(
+            [{"role": "user", "content": task}],
+            process_step_message=log,
+            use_memory=False,
+        )
+
+        # Build and run checks
+        checks = make_checks(tmpdir)
+        print()
+        all_pass = True
+        for check_name, check_fn in checks:
+            try:
+                result = check_fn(tmpdir, calls, rejections)
+                status = "PASS" if result else "FAIL"
+                if not result:
+                    all_pass = False
+            except Exception as e:
+                status = f"FAIL ({e})"
+                all_pass = False
+            print(f"    [{status}] {check_name}")
+
+        # Show file sizes
+        for f in Path(tmpdir).rglob("*"):
+            if f.is_file():
+                content = f.read_text(errors="replace")
+                print(f"\n    {f.name}: {len(content):,} chars, {len(content.splitlines())} lines")
+
+        print(f"\n    Tool calls: {len(calls)} total, {rejections} rejected")
+        print(f"    Sequence: {' -> '.join(calls[:15])}{'...' if len(calls) > 15 else ''}")
+        return all_pass
+
+
+async def main():
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        print("SKIP: OPENAI_API_KEY not set")
+        sys.exit(0)
+
+    print("=" * 70)
+    print("  Live File Writing Test (no size guards)")
+    print("=" * 70)
+
+    results = []
+
+    # ── Scenario 1: Long LaTeX paper ──
+    paper_task = (
+        "Write a complete LaTeX review paper to 'review.tex' about "
+        "single-cell RNA sequencing analysis methods. Requirements:\n"
+        "- \\documentclass{article} with proper packages\n"
+        "- Abstract (100+ words)\n"
+        "- Introduction (200+ words)\n"
+        "- Methods section covering: quality control, normalization, "
+        "dimensionality reduction, clustering, differential expression (300+ words total)\n"
+        "- Results (150+ words)\n"
+        "- Discussion (150+ words)\n"
+        "- Bibliography with at least 10 \\bibitem references\n"
+        "Write EVERYTHING in a single write_file call to review.tex."
+    )
+
+    def make_paper_checks(tmpdir):
+        p = Path(tmpdir) / "review.tex"
+        def r(): return p.read_text() if p.exists() else ""
+        return [
+            ("File created", lambda *_: p.exists()),
+            ("File > 5000 chars", lambda *_: len(r()) > 5000),
+            ("Has \\documentclass", lambda *_: "\\documentclass" in r()),
+            ("Has Introduction", lambda *_: "Introduction" in r()),
+            ("Has Methods", lambda *_: "Methods" in r()),
+            ("Has Discussion", lambda *_: "Discussion" in r()),
+            ("Has 10+ bibitem", lambda *_: r().count("\\bibitem") >= 10),
+            ("No rejections", lambda tmpdir, calls, rej: rej == 0),
+        ]
+
+    r = await run_scenario("Long LaTeX Paper (single write_file)", paper_task, make_paper_checks)
+    results.append(("Paper", r))
+
+    # ── Scenario 2: Long Python code ──
+    code_task = (
+        "Write a complete Python file 'data_pipeline.py' that implements:\n"
+        "1. A DataLoader class with methods: load_csv, load_json, load_parquet, validate_schema "
+        "(each with full implementation using pandas, proper docstrings, type hints, error handling)\n"
+        "2. A DataTransformer class with methods: normalize, filter_outliers, "
+        "encode_categorical, impute_missing (each fully implemented)\n"
+        "3. A DataExporter class with methods: to_csv, to_json, to_parquet, to_sql "
+        "(each fully implemented)\n"
+        "4. A Pipeline class that chains DataLoader -> DataTransformer -> DataExporter "
+        "with a run() method, logging, and error handling\n"
+        "5. A if __name__ == '__main__' block with example usage\n"
+        "Write EVERYTHING in a single write_file call. Every method must have "
+        "a real implementation (no pass, no TODO, no placeholders)."
+    )
+
+    def make_code_checks(tmpdir):
+        p = Path(tmpdir) / "data_pipeline.py"
+        def r(): return p.read_text() if p.exists() else ""
+        return [
+            ("File created", lambda *_: p.exists()),
+            ("File > 3000 chars", lambda *_: len(r()) > 3000),
+            ("Has DataLoader", lambda *_: "class DataLoader" in r()),
+            ("Has DataTransformer", lambda *_: "class DataTransformer" in r()),
+            ("Has DataExporter", lambda *_: "class DataExporter" in r()),
+            ("Has Pipeline", lambda *_: "class Pipeline" in r()),
+            ("Has __main__", lambda *_: "__main__" in r()),
+            ("No rejections", lambda tmpdir, calls, rej: rej == 0),
+        ]
+
+    r = await run_scenario("Long Python Code (single write_file)", code_task, make_code_checks)
+    results.append(("Code", r))
+
+    # ── Summary ──
+    print(f"\n{'=' * 70}")
+    print("  Summary")
+    print(f"{'=' * 70}")
+    for name, passed in results:
+        print(f"  {name}: {'PASS' if passed else 'FAIL'}")
+    print(f"{'=' * 70}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/tests/test_agent.py b/tests/test_agent.py
index 34f61d1ad..6e4a5945d 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 import random
 from pathlib import Path
 from typing import List
@@ -187,8 +188,19 @@ def get_weather(city: str, unit: str = "celsius"):
         """Get the weather of a city."""
         return {"weather": "sunny", "temperature": 20}
 
-    resp = await agent.run("What is the weather in Palo Alto?")
-    print(resp.content)
+    sync_tool_messages = await agent._handle_tool_calls(
+        tool_calls=[{
+            "id": "call_sync_weather",
+            "function": {
+                "name": "get_weather",
+                "arguments": json.dumps({"city": "Palo Alto", "unit": "celsius"}),
+            },
+        }],
+        context_variables={},
+        timeout=agent.tool_timeout,
+    )
+    assert sync_tool_messages
+    assert "sunny" in sync_tool_messages[0]["content"].lower()
 
     agent.functions.clear()
 
@@ -201,9 +213,22 @@ async def get_weather(city: str, unit: str = "celsius"):
         nonlocal flag
         flag = False
 
-    resp = await agent.run("What is the weather in Palo Alto?")
-    assert flag, "Tool should have timed out but it completed execution"
-    print(resp)
+    tool_messages = await agent._handle_tool_calls(
+        tool_calls=[{
+            "id": "call_async_weather",
+            "function": {
+                "name": "get_weather",
+                "arguments": json.dumps({"city": "Palo Alto", "unit": "celsius"}),
+            },
+        }],
+        context_variables={},
+        timeout=agent.tool_timeout,
+    )
+    assert tool_messages
+    bg_tasks = agent._bg_manager.list_tasks()
+    assert bg_tasks, "Timed out tool should be adopted into background execution"
+    assert bg_tasks[0].source == "timeout"
+    assert flag, "Tool coroutine should continue in background instead of blocking the foreground call"
 
 
 async def test_agent_transfer():
diff --git a/tests/test_file_manager.py b/tests/test_file_manager.py
index 1e73eda00..3c2eb6cdb 100644
--- a/tests/test_file_manager.py
+++ b/tests/test_file_manager.py
@@ -3,6 +3,8 @@
 from tempfile import TemporaryDirectory
 from pantheon.toolsets.file import FileManagerToolSet
 
+HAS_OPENAI = bool(os.environ.get("OPENAI_API_KEY"))
+
 @pytest.fixture
 def temp_toolset():
     """Create a FileManagerToolSet with a temporary directory."""
@@ -510,3 +512,105 @@ async def test_manage_path_comprehensive(temp_toolset):
     result = await temp_toolset.manage_path("delete", "nonexistent.txt")
     assert result["success"] is False
     assert "does not exist" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# write_file append mode + large content tests
+# ---------------------------------------------------------------------------
+
+async def test_write_file_append_basic(temp_toolset):
+    """write_file(append=True) appends to existing file."""
+    await temp_toolset.write_file("log.txt", "header\n")
+    res = await temp_toolset.write_file("log.txt", "line1\nline2\n", append=True)
+    assert res["success"]
+    assert res["appended_chars"] == len("line1\nline2\n")
+    content = (await temp_toolset.read_file("log.txt"))["content"]
+    assert content == "header\nline1\nline2\n"
+
+
+async def test_write_file_append_multiple_batches(temp_toolset):
+    """write_file(append=True) supports multiple sequential appends."""
+    await temp_toolset.write_file("refs.bib", "% Bibliography\n")
+    for i in range(5):
+        batch = f"@article{{ref{i},\n  title={{Title {i}}},\n}}\n\n"
+        res = await temp_toolset.write_file("refs.bib", batch, append=True)
+        assert res["success"], f"Batch {i} failed: {res}"
+    content = (await temp_toolset.read_file("refs.bib"))["content"]
+    assert content.startswith("% Bibliography\n")
+    assert content.count("@article{") == 5
+
+
+async def test_write_file_append_rejects_nonexistent(temp_toolset):
+    """write_file(append=True) rejects when file does not exist."""
+    res = await temp_toolset.write_file("missing.txt", "data", append=True)
+    assert not res["success"]
+    assert res["reason"] == "file_not_found"
+
+
+async def test_write_file_large_content(temp_toolset):
+    """write_file accepts large content (no size guards — root cause fixed at LLM layer)."""
+    big = "x" * 100_000
+    res = await temp_toolset.write_file("big.txt", big)
+    assert res["success"]
+    assert (temp_toolset.path / "big.txt").read_text() == big
+
+
+async def test_update_file_large_new_string(temp_toolset):
+    """update_file accepts large new_string (no size guards)."""
+    await temp_toolset.write_file("doc.txt", "PLACEHOLDER\n")
+    big = "y" * 50_000
+    res = await temp_toolset.update_file("doc.txt", "PLACEHOLDER", big)
+    assert res["success"]
+    content = (await temp_toolset.read_file("doc.txt"))["content"]
+    assert big in content
+
+
+async def test_write_file_append_large_content(temp_toolset):
+    """write_file(append=True) accepts large content (no size guards)."""
+    await temp_toolset.write_file("base.txt", "start\n")
+    big = "z" * 50_000
+    res = await temp_toolset.write_file("base.txt", big, append=True)
+    assert res["success"]
+    content = (await temp_toolset.read_file("base.txt"))["content"]
+    assert content == "start\n" + big
+
+
+# ---------------------------------------------------------------------------
+# max_tokens auto-detection (PR #55 — 7920a72)
+# ---------------------------------------------------------------------------
+
+def test_max_tokens_auto_set():
+    """acompletion must auto-set max_tokens from model's max_output_tokens
+    when not explicitly provided (prevents Anthropic 4096 default truncation)."""
+    from pantheon.utils.provider_registry import get_model_info
+
+    # Anthropic model — the original failure case
+    info = get_model_info("anthropic/claude-3-haiku-20240307")
+    max_out = info.get("max_output_tokens", 0)
+    assert max_out > 4096, (
+        f"Expected max_output_tokens > 4096 for claude-3-haiku, got {max_out}"
+    )
+
+    # OpenAI model
+    info = get_model_info("openai/gpt-4.1-mini")
+    max_out = info.get("max_output_tokens", 0)
+    assert max_out > 0, f"Expected max_output_tokens > 0 for gpt-4.1-mini, got {max_out}"
+
+
+@pytest.mark.skipif(not HAS_OPENAI, reason="OPENAI_API_KEY not set")
+async def test_max_tokens_live_openai():
+    """Live test: acompletion sets max_tokens automatically, preventing truncation."""
+    from pantheon.utils.llm_providers import call_llm_provider, detect_provider
+
+    provider_config = detect_provider("openai/gpt-4.1-mini", False)
+    # Call with a simple prompt, no explicit max_tokens in model_params
+    message = await call_llm_provider(
+        config=provider_config,
+        messages=[
+            {"role": "system", "content": "Reply with exactly: OK"},
+            {"role": "user", "content": "Say OK"},
+        ],
+    )
+    assert isinstance(message, dict)
+    content = message.get("content", "")
+    assert len(content) > 0, "Expected non-empty response"
diff --git a/tests/test_provider_adapters.py b/tests/test_provider_adapters.py
index 8046ab00f..55edea244 100644
--- a/tests/test_provider_adapters.py
+++ b/tests/test_provider_adapters.py
@@ -24,6 +24,7 @@
     load_catalog,
     find_provider_for_model,
     get_model_info,
+    get_output_token_param,
     completion_cost,
     models_by_provider,
     token_counter,
@@ -70,6 +71,11 @@ def test_get_model_info_known(self):
         assert info["max_input_tokens"] == 1_000_000
         assert info["supports_vision"] is True
 
+    def test_get_model_info_openai_gpt_4o_mini(self):
+        info = get_model_info("gpt-4o-mini")
+        assert info["max_input_tokens"] == 128_000
+        assert info["max_output_tokens"] == 16_384
+
     def test_get_model_info_unknown_returns_defaults(self):
         info = get_model_info("fake/nonexistent-model")
         assert info["max_input_tokens"] == 200_000
@@ -86,6 +92,15 @@ def test_models_by_provider_qwen(self):
         models = models_by_provider("qwen")
         assert len(models) == 9
 
+    def test_output_token_param_catalog(self):
+        assert get_output_token_param("openai/gpt-5.4") == "max_completion_tokens"
+        assert get_output_token_param("anthropic/claude-sonnet-4-6") == "max_tokens"
+        assert get_output_token_param("gemini/gemini-2.5-flash") == "max_output_tokens"
+        assert get_output_token_param("deepseek/deepseek-chat") == "max_tokens"
+        assert get_output_token_param("minimax/MiniMax-M2.5") == "max_tokens"
+        assert get_output_token_param("groq/llama-3.3-70b-versatile") == "max_completion_tokens"
+        assert get_output_token_param("codex/gpt-5.4", api_mode="responses") == "max_output_tokens"
+
     def test_token_counter_basic(self):
         count = token_counter(model="gpt-4", messages=[{"role": "user", "content": "Hello"}])
         assert count > 0
@@ -107,6 +122,50 @@ def test_all_default_models_in_catalog(self):
         assert missing == [], f"Models in selector but not in catalog: {missing}"
 
 
+@pytest.mark.asyncio
+async def test_llm_uses_catalog_output_param_for_openai(monkeypatch):
+    from pantheon.utils import llm as llm_module
+    from pantheon.utils import adapters as adapters_module
+
+    captured = {}
+
+    class DummyAdapter:
+        async def acompletion(self, **kwargs):
+            captured.update(kwargs)
+            return [
+                {
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {"role": "assistant", "content": "ok"},
+                            "finish_reason": "stop",
+                        }
+                    ],
+                    "model": kwargs["model"],
+                },
+                {
+                    "usage": {
+                        "prompt_tokens": 1,
+                        "completion_tokens": 1,
+                        "total_tokens": 2,
+                    },
+                    "choices": [],
+                },
+            ]
+
+    monkeypatch.setattr(adapters_module, "get_adapter", lambda _sdk: DummyAdapter())
+
+    resp = await llm_module.acompletion(
+        messages=[{"role": "user", "content": "hello"}],
+        model="openai/gpt-5.4",
+        model_params={},
+    )
+
+    assert resp.choices[0].message.content == "ok"
+    assert captured["max_completion_tokens"] == 64000
+    assert "max_tokens" not in captured
+
+
 # ============ stream_chunk_builder unit tests ============