From dd36aaa04e9f0ed0e5a92b535ead2be4ae38de56 Mon Sep 17 00:00:00 2001
From: Hackall <36754621+hackall360@users.noreply.github.com>
Date: Sat, 27 Sep 2025 06:26:45 -0700
Subject: [PATCH 1/3] Add LM Studio backend support for new models

---
 README.md                                     |  17 +-
 codex-rs/core/src/chat_completions.rs         |  27 +++-
 codex-rs/core/src/openai_tools.rs             |   2 +-
 .../core/tests/chat_completions_payload.rs    | 153 ++++++++++++++----
 codex-rs/exec/src/cli.rs                      |   3 +-
 codex-rs/exec/tests/suite/lmstudio.rs         | 107 +++++++++++-
 codex-rs/lmstudio/src/lib.rs                  |  91 +++++++----
 codex-rs/tui/src/cli.rs                       |   3 +-
 docs/getting-started.md                       |   4 +-
 9 files changed, 317 insertions(+), 90 deletions(-)
diff --git a/README.md b/README.md
index 132552ca3ed..2b5a9cf155a 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,7 @@ Codex can run fully locally by delegating inference to [LM Studio](https://lmstu
 3. Run Codex with the LM Studio backend:
 
    ```shell
-   # Interactive session using the default LLaMA 3.1 8B Instruct model
+   # Interactive session using the default DevStral Small (LLaMA architecture) build
    codex --backend lmstudio
 
    # Explicitly pick one of the supported architectures
@@ -88,15 +88,18 @@ Codex can run fully locally by delegating inference to [LM Studio](https://lmstu
 
 Codex understands the following architecture aliases when `--backend lmstudio` is selected:
 
-| Alias      | LM Studio model identifier                          |
-| ---------- | --------------------------------------------------- |
-| `llama`    | `meta-llama/Meta-Llama-3.1-8B-Instruct`              |
-| `qwen2`    | `Qwen/Qwen2-7B-Instruct`                            |
-| `qwen3`    | `Qwen/Qwen3-7B-Instruct`                            |
-| `qwen3-moe`| `Qwen/Qwen3-MoE-A2.7B-Instruct`                     |
+| Alias          | LM Studio model identifier            |
+| -------------- | ------------------------------------- |
+| `llama`        | `mistralai/devstral-small-2507`       |
+| `qwen2`        | `qwen/qwen2.5-coder-14b`              |
+| `qwen3`        | `qwen/qwen3-4b-2507`                  |
+| `qwen3-moe`    | `qwen/qwen3-coder-30b`                |
+| `qwen3-moe-a3b`| `qwen/qwen3-30b-a3b-2507`             |
 
 You can also pass the exact LM Studio identifier (for example `my-org/custom-model`) if you are running a different checkpoint. Codex verifies that the requested model is available from LM Studio and surfaces clear errors when it is not.
 
+Structured output (`--output-schema`) works with LM Studio; Codex automatically forwards your schema using the OpenAI-compatible `response_format` field so local models can emit strict JSON.
+
 ---
 
 ### Docs & FAQ
diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs
index eddc7864845..a8fe1700b59 100644
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -23,6 +23,7 @@ use crate::error::CodexErr;
 use crate::error::Result;
 use crate::model_family::ModelFamily;
 use crate::openai_tools::create_tools_json_for_chat_completions_api;
+use crate::openai_tools::sanitize_json_schema;
 use crate::util::backoff;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::ReasoningItemContent;
@@ -35,12 +36,6 @@ pub(crate) async fn stream_chat_completions(
     client: &reqwest::Client,
     provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
-    if prompt.output_schema.is_some() {
-        return Err(CodexErr::UnsupportedOperation(
-            "output_schema is not supported for Chat Completions API".to_string(),
-        ));
-    }
-
     // Build messages array
     let mut messages = Vec::<serde_json::Value>::new();
 
@@ -274,13 +269,31 @@ pub(crate) async fn stream_chat_completions(
     }
 
     let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
-    let payload = json!({
+    let mut payload = json!({
         "model": model_family.slug,
         "messages": messages,
         "stream": true,
         "tools": tools_json,
     });
 
+    if let Some(schema) = &prompt.output_schema
+        && let Some(obj) = payload.as_object_mut()
+    {
+        let mut sanitized_schema = schema.clone();
+        sanitize_json_schema(&mut sanitized_schema);
+        obj.insert(
+            "response_format".to_string(),
+            json!({
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "codex_output_schema",
+                    "schema": sanitized_schema,
+                    "strict": true,
+                }
+            }),
+        );
+    }
+
     debug!(
         "POST to {}: {}",
         provider.get_full_url(&None),
diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs
index 48dca796719..30d3bada797 100644
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -369,7 +369,7 @@ pub(crate) fn mcp_tool_to_openai_tool(
 ///   and otherwise defaults to "string".
 /// - Fills required child fields (e.g. array items, object properties) with
 ///   permissive defaults when absent.
-fn sanitize_json_schema(value: &mut JsonValue) {
+pub(crate) fn sanitize_json_schema(value: &mut JsonValue) {
     match value {
         JsonValue::Bool(_) => {
             // JSON Schema boolean form: true/false. Coerce to an accept-all string.
diff --git a/codex-rs/core/tests/chat_completions_payload.rs b/codex-rs/core/tests/chat_completions_payload.rs
index ba3fe9de791..0596b8c3062 100644
--- a/codex-rs/core/tests/chat_completions_payload.rs
+++ b/codex-rs/core/tests/chat_completions_payload.rs
@@ -12,6 +12,7 @@ use codex_core::ResponseItem;
 use codex_core::WireApi;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_protocol::mcp_protocol::ConversationId;
+use codex_protocol::models::FunctionCallOutputPayload;
 use core_test_support::load_default_config_for_test;
 use futures::StreamExt;
 use serde_json::Value;
@@ -26,7 +27,7 @@ fn network_disabled() -> bool {
     std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
 }
 
-async fn run_request(input: Vec<ResponseItem>) -> Value {
+async fn run_request(input: Vec<ResponseItem>, output_schema: Option<Value>) -> Value {
     let server = MockServer::start().await;
 
     let template = ResponseTemplate::new(200)
@@ -81,6 +82,7 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
 
     let mut prompt = Prompt::default();
     prompt.input = input;
+    prompt.output_schema = output_schema;
 
     let mut stream = match client.stream(&prompt).await {
         Ok(s) => s,
@@ -180,7 +182,7 @@ async fn omits_reasoning_when_none_present() {
         return;
     }
 
-    let body = run_request(vec![user_message("u1"), assistant_message("a1")]).await;
+    let body = run_request(vec![user_message("u1"), assistant_message("a1")], None).await;
     let messages = messages_from(&body);
     let assistant = first_assistant(&messages);
 
@@ -197,11 +199,14 @@ async fn attaches_reasoning_to_previous_assistant() {
         return;
     }
 
-    let body = run_request(vec![
-        user_message("u1"),
-        assistant_message("a1"),
-        reasoning_item("rA"),
-    ])
+    let body = run_request(
+        vec![
+            user_message("u1"),
+            assistant_message("a1"),
+            reasoning_item("rA"),
+        ],
+        None,
+    )
     .await;
     let messages = messages_from(&body);
     let assistant = first_assistant(&messages);
@@ -219,11 +224,10 @@ async fn attaches_reasoning_to_function_call_anchor() {
         return;
     }
 
-    let body = run_request(vec![
-        user_message("u1"),
-        reasoning_item("rFunc"),
-        function_call(),
-    ])
+    let body = run_request(
+        vec![user_message("u1"), reasoning_item("rFunc"), function_call()],
+        None,
+    )
     .await;
     let messages = messages_from(&body);
     let assistant = first_assistant(&messages);
@@ -246,11 +250,14 @@ async fn attaches_reasoning_to_local_shell_call() {
         return;
     }
 
-    let body = run_request(vec![
-        user_message("u1"),
-        reasoning_item("rShell"),
-        local_shell_call(),
-    ])
+    let body = run_request(
+        vec![
+            user_message("u1"),
+            reasoning_item("rShell"),
+            local_shell_call(),
+        ],
+        None,
+    )
     .await;
     let messages = messages_from(&body);
     let assistant = first_assistant(&messages);
@@ -271,11 +278,14 @@ async fn drops_reasoning_when_last_role_is_user() {
         return;
     }
 
-    let body = run_request(vec![
-        assistant_message("aPrev"),
-        reasoning_item("rHist"),
-        user_message("uNew"),
-    ])
+    let body = run_request(
+        vec![
+            assistant_message("aPrev"),
+            reasoning_item("rHist"),
+            user_message("uNew"),
+        ],
+        None,
+    )
     .await;
     let messages = messages_from(&body);
     assert!(messages.iter().all(|msg| msg.get("reasoning").is_none()));
@@ -290,12 +300,15 @@ async fn ignores_reasoning_before_last_user() {
         return;
     }
 
-    let body = run_request(vec![
-        user_message("u1"),
-        assistant_message("a1"),
-        user_message("u2"),
-        reasoning_item("rAfterU1"),
-    ])
+    let body = run_request(
+        vec![
+            user_message("u1"),
+            assistant_message("a1"),
+            user_message("u2"),
+            reasoning_item("rAfterU1"),
+        ],
+        None,
+    )
     .await;
     let messages = messages_from(&body);
     assert!(messages.iter().all(|msg| msg.get("reasoning").is_none()));
@@ -310,12 +323,15 @@ async fn skips_empty_reasoning_segments() {
         return;
     }
 
-    let body = run_request(vec![
-        user_message("u1"),
-        assistant_message("a1"),
-        reasoning_item(""),
-        reasoning_item("   "),
-    ])
+    let body = run_request(
+        vec![
+            user_message("u1"),
+            assistant_message("a1"),
+            reasoning_item(""),
+            reasoning_item("   "),
+        ],
+        None,
+    )
     .await;
     let messages = messages_from(&body);
     let assistant = first_assistant(&messages);
@@ -331,7 +347,11 @@ async fn suppresses_duplicate_assistant_messages() {
         return;
     }
 
-    let body = run_request(vec![assistant_message("dup"), assistant_message("dup")]).await;
+    let body = run_request(
+        vec![assistant_message("dup"), assistant_message("dup")],
+        None,
+    )
+    .await;
     let messages = messages_from(&body);
     let assistant_messages: Vec<_> = messages
         .iter()
@@ -343,3 +363,66 @@ async fn suppresses_duplicate_assistant_messages() {
         Value::String("dup".into())
     );
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_response_format_when_schema_present() {
+    if network_disabled() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let schema = serde_json::json!({
+        "type": "object",
+        "properties": { "answer": { "type": "string" } },
+        "required": ["answer"]
+    });
+
+    let body = run_request(vec![user_message("u1")], Some(schema.clone())).await;
+    let expected = serde_json::json!({
+        "type": "json_schema",
+        "json_schema": {
+            "name": "codex_output_schema",
+            "schema": schema,
+            "strict": true,
+        }
+    });
+
+    assert_eq!(body.get("response_format"), Some(&expected));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_tool_call_outputs() {
+    if network_disabled() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let body = run_request(
+        vec![
+            user_message("u1"),
+            function_call(),
+            ResponseItem::FunctionCallOutput {
+                call_id: "c1".into(),
+                output: FunctionCallOutputPayload {
+                    content: "done".into(),
+                    success: Some(true),
+                },
+            },
+        ],
+        None,
+    )
+    .await;
+
+    let messages = messages_from(&body);
+    let tool_message = messages
+        .iter()
+        .find(|msg| msg["role"] == "tool")
+        .expect("tool message missing");
+
+    assert_eq!(tool_message["tool_call_id"], Value::String("c1".into()));
+    assert_eq!(tool_message["content"], Value::String("done".into()));
+}
diff --git a/codex-rs/exec/src/cli.rs b/codex-rs/exec/src/cli.rs
index 07065a402de..bd3868c960b 100644
--- a/codex-rs/exec/src/cli.rs
+++ b/codex-rs/exec/src/cli.rs
@@ -15,7 +15,8 @@ pub struct Cli {
     #[arg(long = "image", short = 'i', value_name = "FILE", value_delimiter = ',', num_args = 1..)]
     pub images: Vec<PathBuf>,
 
-    /// Model the agent should use.
+    /// Model the agent should use. For LM Studio backends, pass architecture aliases such as
+    /// `llama`, `qwen2`, `qwen3`, or `qwen3-moe`.
     #[arg(long, short = 'm')]
     pub model: Option<String>,
 
diff --git a/codex-rs/exec/tests/suite/lmstudio.rs b/codex-rs/exec/tests/suite/lmstudio.rs
index fc1eaa6f29e..92c5e611146 100644
--- a/codex-rs/exec/tests/suite/lmstudio.rs
+++ b/codex-rs/exec/tests/suite/lmstudio.rs
@@ -11,13 +11,18 @@ use wiremock::http::Method;
 use wiremock::matchers::method;
 use wiremock::matchers::path;
 
+use codex_lmstudio::DEFAULT_LM_STUDIO_MODEL;
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn exec_resolves_lmstudio_model_aliases() -> anyhow::Result<()> {
     let cases = [
-        ("llama", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
-        ("qwen2", "Qwen/Qwen2-7B-Instruct"),
-        ("qwen3", "Qwen/Qwen3-7B-Instruct"),
-        ("qwen3-moe", "Qwen/Qwen3-MoE-A2.7B-Instruct"),
+        ("llama", DEFAULT_LM_STUDIO_MODEL),
+        ("devstral", DEFAULT_LM_STUDIO_MODEL),
+        ("qwen2", "qwen/qwen2.5-coder-14b"),
+        ("qwen3", "qwen/qwen3-4b-2507"),
+        ("qwen3-moe", "qwen/qwen3-coder-30b"),
+        ("qwen3moe", "qwen/qwen3-coder-30b"),
+        ("qwen3-moe-a3b", "qwen/qwen3-30b-a3b-2507"),
     ];
 
     for (alias, expected_model) in cases {
@@ -104,3 +109,97 @@ async fn exec_resolves_lmstudio_model_aliases() -> anyhow::Result<()> {
 
     Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_includes_output_schema_for_lmstudio_requests() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let schema_contents = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+    let schema_path = test.cwd_path().join("lmstudio-schema.json");
+    std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
+
+    let server = responses::start_mock_server().await;
+    let models_payload = serde_json::json!({
+        "data": [
+            { "id": DEFAULT_LM_STUDIO_MODEL }
+        ]
+    });
+
+    Mock::given(method("GET"))
+        .and(path("/v1/models"))
+        .respond_with(ResponseTemplate::new(200).set_body_json(models_payload))
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let chat_stream = concat!(
+        "data: {\"choices\":[{\"delta\":{\"content\":\"ok\"}}]}\n\n",
+        "data: {\"choices\":[{\"delta\":{}}]}\n\n",
+        "data: [DONE]\n\n",
+    );
+
+    Mock::given(method("POST"))
+        .and(path("/v1/chat/completions"))
+        .respond_with(
+            ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(chat_stream, "text/event-stream"),
+        )
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    test.cmd()
+        .env("CODEX_LM_STUDIO_BASE_URL", format!("{}/v1", server.uri()))
+        .arg("--skip-git-repo-check")
+        .arg("--backend")
+        .arg("lmstudio")
+        .arg("--model")
+        .arg("llama")
+        .arg("--output-schema")
+        .arg(&schema_path)
+        .arg("hi")
+        .assert()
+        .success();
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("failed to capture requests");
+
+    let mut saw_response_format = false;
+    for req in &requests {
+        if req.method == Method::POST && req.url.path() == "/v1/chat/completions" {
+            let payload: Value = serde_json::from_slice(&req.body)
+                .context("LM Studio response request should be valid JSON")?;
+            let expected_format = serde_json::json!({
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "codex_output_schema",
+                    "schema": schema_contents,
+                    "strict": true,
+                }
+            });
+            assert_eq!(
+                payload.get("response_format"),
+                Some(&expected_format),
+                "LM Studio request missing response_format payload"
+            );
+            saw_response_format = true;
+        }
+    }
+
+    assert!(
+        saw_response_format,
+        "did not observe LM Studio chat request"
+    );
+    server.verify().await;
+    Ok(())
+}
diff --git a/codex-rs/lmstudio/src/lib.rs b/codex-rs/lmstudio/src/lib.rs
index 4073df817f9..f6f5c1ca9ed 100644
--- a/codex-rs/lmstudio/src/lib.rs
+++ b/codex-rs/lmstudio/src/lib.rs
@@ -6,7 +6,7 @@ use std::io;
 use std::time::Duration;
 
 /// Default LM Studio model used when `--backend lmstudio` is specified without `--model`.
-pub const DEFAULT_LM_STUDIO_MODEL: &str = "meta-llama/Meta-Llama-3.1-8B-Instruct";
+pub const DEFAULT_LM_STUDIO_MODEL: &str = "mistralai/devstral-small-2507";
 
 const LM_STUDIO_CONNECTION_ERROR: &str = "No running LM Studio server detected. Launch LM Studio and enable the local inference server (Preferences → Developer → Enable local server).";
 
@@ -21,17 +21,43 @@ const MODEL_ALIAS_TABLE: &[(&str, &str)] = &[
     ("llama-31", DEFAULT_LM_STUDIO_MODEL),
     ("llama-3.1", DEFAULT_LM_STUDIO_MODEL),
     ("llama3-8b", DEFAULT_LM_STUDIO_MODEL),
-    ("qwen2", "Qwen/Qwen2-7B-Instruct"),
-    ("qwen-2", "Qwen/Qwen2-7B-Instruct"),
-    ("qwen2-7b", "Qwen/Qwen2-7B-Instruct"),
-    ("qwen3", "Qwen/Qwen3-7B-Instruct"),
-    ("qwen-3", "Qwen/Qwen3-7B-Instruct"),
-    ("qwen3-7b", "Qwen/Qwen3-7B-Instruct"),
-    ("qwen3-moe", "Qwen/Qwen3-MoE-A2.7B-Instruct"),
-    ("qwen3_moe", "Qwen/Qwen3-MoE-A2.7B-Instruct"),
-    ("qwen-3-moe", "Qwen/Qwen3-MoE-A2.7B-Instruct"),
+    ("devstral", DEFAULT_LM_STUDIO_MODEL),
+    ("devstral-small", DEFAULT_LM_STUDIO_MODEL),
+    ("devstral-small-2507", DEFAULT_LM_STUDIO_MODEL),
+    ("qwen2", "qwen/qwen2.5-coder-14b"),
+    ("qwen2-5", "qwen/qwen2.5-coder-14b"),
+    ("qwen2.5", "qwen/qwen2.5-coder-14b"),
+    ("qwen-2", "qwen/qwen2.5-coder-14b"),
+    ("qwen2-14b", "qwen/qwen2.5-coder-14b"),
+    ("qwen3", "qwen/qwen3-4b-2507"),
+    ("qwen-3", "qwen/qwen3-4b-2507"),
+    ("qwen3-4b", "qwen/qwen3-4b-2507"),
+    ("qwen3-moe", "qwen/qwen3-coder-30b"),
+    ("qwen3moe", "qwen/qwen3-coder-30b"),
+    ("qwen3_moe", "qwen/qwen3-coder-30b"),
+    ("qwen-3-moe", "qwen/qwen3-coder-30b"),
+    ("qwen3-coder", "qwen/qwen3-coder-30b"),
+    ("qwen3-30b", "qwen/qwen3-coder-30b"),
+    ("qwen3-moe-a3b", "qwen/qwen3-30b-a3b-2507"),
+    ("qwen3-moe-a3b-2507", "qwen/qwen3-30b-a3b-2507"),
+    ("qwen3-30b-a3b", "qwen/qwen3-30b-a3b-2507"),
 ];
 
+const MODEL_ALIAS_HINTS: &[(&str, &str)] = &[
+    ("llama", DEFAULT_LM_STUDIO_MODEL),
+    ("qwen2", "qwen/qwen2.5-coder-14b"),
+    ("qwen3", "qwen/qwen3-4b-2507"),
+    ("qwen3-moe", "qwen/qwen3-coder-30b"),
+];
+
+fn alias_examples() -> String {
+    MODEL_ALIAS_HINTS
+        .iter()
+        .map(|(alias, model)| format!("{alias} → {model}"))
+        .collect::<Vec<_>>()
+        .join(", ")
+}
+
 /// Error returned when a provided LM Studio model alias cannot be resolved.
 #[derive(Debug, Clone)]
 pub struct UnsupportedModelAliasError {
@@ -51,15 +77,17 @@ impl std::fmt::Display for UnsupportedModelAliasError {
         if self.alias.trim().is_empty() {
             write!(
                 f,
-                "LM Studio model name cannot be empty. Supported architectures: {}. You can also pass a full LM Studio model identifier (for example `namespace/model-name`).",
-                SUPPORTED_ARCHITECTURES.join(", ")
+                "LM Studio model name cannot be empty. Supported architectures: {}. Try one of the aliases ({}), or pass a full LM Studio model identifier (for example `namespace/model-name`).",
+                SUPPORTED_ARCHITECTURES.join(", "),
+                alias_examples()
             )
         } else {
             write!(
                 f,
-                "Unsupported LM Studio model alias `{}`. Supported architectures: {}. Provide one of the aliases or the full model identifier as shown in LM Studio.",
+                "Unsupported LM Studio model alias `{}`. Supported architectures: {}. Try one of the aliases ({}), or provide the full model identifier as shown in LM Studio.",
                 self.alias,
-                SUPPORTED_ARCHITECTURES.join(", ")
+                SUPPORTED_ARCHITECTURES.join(", "),
+                alias_examples()
             )
         }
     }
@@ -111,10 +139,7 @@ pub async fn ensure_lmstudio_ready(config: &Config) -> io::Result<()> {
         .ok_or_else(|| {
             io::Error::new(
                 io::ErrorKind::NotFound,
-                format!(
-                    "Built-in provider `{}` not found",
-                    BUILT_IN_LM_STUDIO_MODEL_PROVIDER_ID
-                ),
+                format!("Built-in provider `{BUILT_IN_LM_STUDIO_MODEL_PROVIDER_ID}` not found"),
             )
         })?;
 
@@ -191,22 +216,20 @@ fn model_available(payload: &JsonValue, target_model: &str) -> bool {
             .unwrap_or(false)
     }
 
-    if let Some(entries) = payload.get("data").and_then(|v| v.as_array()) {
-        if entries
+    if let Some(entries) = payload.get("data").and_then(|v| v.as_array())
+        && entries
             .iter()
             .any(|entry| matches_entry(entry, target_model))
-        {
-            return true;
-        }
+    {
+        return true;
     }
 
-    if let Some(entries) = payload.get("models").and_then(|v| v.as_array()) {
-        if entries
+    if let Some(entries) = payload.get("models").and_then(|v| v.as_array())
+        && entries
             .iter()
             .any(|entry| matches_entry(entry, target_model))
-        {
-            return true;
-        }
+    {
+        return true;
     }
 
     false
@@ -235,15 +258,19 @@ mod tests {
         );
         assert_eq!(
             resolve_model_identifier(Some("qwen2")).unwrap(),
-            "Qwen/Qwen2-7B-Instruct"
+            "qwen/qwen2.5-coder-14b"
         );
         assert_eq!(
             resolve_model_identifier(Some("qwen3")).unwrap(),
-            "Qwen/Qwen3-7B-Instruct"
+            "qwen/qwen3-4b-2507"
         );
         assert_eq!(
             resolve_model_identifier(Some("qwen3-moe")).unwrap(),
-            "Qwen/Qwen3-MoE-A2.7B-Instruct"
+            "qwen/qwen3-coder-30b"
+        );
+        assert_eq!(
+            resolve_model_identifier(Some("qwen3-moe-a3b")).unwrap(),
+            "qwen/qwen3-30b-a3b-2507"
         );
     }
 
@@ -270,7 +297,7 @@ mod tests {
         let response = serde_json::json!({
             "data": [
                 { "id": DEFAULT_LM_STUDIO_MODEL },
-                { "id": "Qwen/Qwen3-7B-Instruct" }
+                { "id": "qwen/qwen3-4b-2507" }
             ]
         });
         Mock::given(method("GET"))
diff --git a/codex-rs/tui/src/cli.rs b/codex-rs/tui/src/cli.rs
index 067061be5d7..b72e91c5cc7 100644
--- a/codex-rs/tui/src/cli.rs
+++ b/codex-rs/tui/src/cli.rs
@@ -28,7 +28,8 @@ pub struct Cli {
     #[clap(skip)]
     pub resume_session_id: Option<String>,
 
-    /// Model the agent should use.
+    /// Model the agent should use. When using LM Studio, pass architecture aliases like `llama`,
+    /// `qwen2`, `qwen3`, or `qwen3-moe`.
     #[arg(long, short = 'm')]
     pub model: Option<String>,
 
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 8db4fc6b080..95132696a59 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -54,12 +54,12 @@ To run Codex entirely against a local LM Studio instance:
 3. Select the backend and architecture when launching Codex:
 
    ```shell
-   codex --backend lmstudio                # defaults to LLaMA 3.1 8B Instruct
+   codex --backend lmstudio                # defaults to DevStral Small (LLaMA architecture)
    codex --backend lmstudio --model qwen2  # pick a specific architecture
    codex exec --backend lmstudio --model qwen3-moe "generate unit tests"
    ```
 
-Codex accepts friendly aliases for the most common LM Studio builds (`llama`, `qwen2`, `qwen3`, `qwen3-moe`) or you can pass the exact identifier shown in LM Studio. If the requested model is not available, Codex reports a clear error so you can download or start it inside LM Studio.
+Codex accepts friendly aliases for the most common LM Studio builds (`llama`, `qwen2`, `qwen3`, `qwen3-moe`, `qwen3-moe-a3b`) or you can pass the exact identifier shown in LM Studio. If the requested model is not available, Codex reports a clear error so you can download or start it inside LM Studio. Structured output (`--output-schema`) is supported: Codex forwards your schema to LM Studio via the OpenAI-compatible `response_format` field.
 
 ### Example prompts
 

From 79c010fab5ccaaf3c29990664aa18af4b99ea1d7 Mon Sep 17 00:00:00 2001
From: Hackall <36754621+hackall360@users.noreply.github.com>
Date: Sat, 27 Sep 2025 06:43:45 -0700
Subject: [PATCH 2/3] Ensure LM Studio tool calls propagate outputs

---
 README.md                             |  2 +-
 codex-rs/core/src/chat_completions.rs | 62 ++++++++++++++++++++++++++-
 docs/getting-started.md               |  2 +-
 3 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2b5a9cf155a..e5d33394d51 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ Codex understands the following architecture aliases when `--backend lmstudio` i
 
 You can also pass the exact LM Studio identifier (for example `my-org/custom-model`) if you are running a different checkpoint. Codex verifies that the requested model is available from LM Studio and surfaces clear errors when it is not.
 
-Structured output (`--output-schema`) works with LM Studio; Codex automatically forwards your schema using the OpenAI-compatible `response_format` field so local models can emit strict JSON.
+Structured output is optional with LM Studio: Codex automatically forwards a schema when you provide `--output-schema`, but models can access command/tool output even without one.
 
 ---
 
diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs
index a8fe1700b59..96977150fc8 100644
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -14,6 +14,7 @@ use tokio::sync::mpsc;
 use tokio::time::timeout;
 use tracing::debug;
 use tracing::trace;
+use uuid::Uuid;
 
 use crate::ModelProviderInfo;
 use crate::client_common::Prompt;
@@ -382,6 +383,14 @@ async fn process_chat_sse<S>(
         active: bool,
     }
 
+    impl FunctionCallState {
+        fn ensure_call_id(&mut self) -> String {
+            self.call_id
+                .get_or_insert_with(|| format!("tool_call_{}", Uuid::new_v4()))
+                .clone()
+        }
+    }
+
     let mut fn_call_state = FunctionCallState::default();
     let mut assistant_text = String::new();
     let mut reasoning_text = String::new();
@@ -547,6 +556,8 @@ async fn process_chat_sse<S>(
                 // Extract call_id if present.
                 if let Some(id) = tool_call.get("id").and_then(|v| v.as_str()) {
                     fn_call_state.call_id.get_or_insert_with(|| id.to_string());
+                } else if fn_call_state.call_id.is_none() {
+                    fn_call_state.ensure_call_id();
                 }
 
                 // Extract function details if present.
@@ -585,7 +596,7 @@ async fn process_chat_sse<S>(
                             id: None,
                             name: fn_call_state.name.clone().unwrap_or_else(|| "".to_string()),
                             arguments: fn_call_state.arguments.clone(),
-                            call_id: fn_call_state.call_id.clone().unwrap_or_else(String::new),
+                            call_id: fn_call_state.ensure_call_id(),
                         };
 
                         let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
@@ -636,6 +647,55 @@ async fn process_chat_sse<S>(
     }
 }
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use bytes::Bytes;
+    use futures::stream;
+    use tokio::time::Duration;
+
+    use crate::error::CodexErr;
+
+    #[tokio::test]
+    async fn generates_tool_call_id_when_missing() {
+        let chunks = vec![
+            Ok::<Bytes, CodexErr>(Bytes::from_static(
+                b"data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"type\":\"function\",\"function\":{\"name\":\"shell\",\"arguments\":\"{\\\"command\\\":[\\\"echo\\\"],\\\"timeout_ms\\\":1000}\"}}]}}]}\n\n",
+            )),
+            Ok::<Bytes, CodexErr>(Bytes::from_static(
+                b"data: {\"choices\":[{\"finish_reason\":\"tool_calls\"}]}\n\n",
+            )),
+        ];
+
+        let stream = stream::iter(chunks);
+        let (tx, mut rx) = mpsc::channel(8);
+        let handle = tokio::spawn(async move {
+            process_chat_sse(stream, tx, Duration::from_secs(5)).await;
+        });
+
+        let mut observed_call_id: Option<String> = None;
+        while let Some(event) = rx.recv().await {
+            match event.expect("stream event") {
+                ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id, .. }) => {
+                    observed_call_id = Some(call_id);
+                }
+                ResponseEvent::Completed { .. } => break,
+                _ => {}
+            }
+        }
+
+        handle.await.expect("process_chat_sse task");
+
+        let call_id = observed_call_id.expect("missing tool call");
+        assert!(!call_id.is_empty(), "call_id should not be empty");
+        assert!(
+            call_id.starts_with("tool_call_"),
+            "unexpected fallback call_id prefix: {}",
+            call_id
+        );
+    }
+}
+
 /// Optional client-side aggregation helper
 ///
 /// Stream adapter that merges the incremental `OutputItemDone` chunks coming from
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 95132696a59..5317dd8fa7b 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -59,7 +59,7 @@ To run Codex entirely against a local LM Studio instance:
    codex exec --backend lmstudio --model qwen3-moe "generate unit tests"
    ```
 
-Codex accepts friendly aliases for the most common LM Studio builds (`llama`, `qwen2`, `qwen3`, `qwen3-moe`, `qwen3-moe-a3b`) or you can pass the exact identifier shown in LM Studio. If the requested model is not available, Codex reports a clear error so you can download or start it inside LM Studio. Structured output (`--output-schema`) is supported: Codex forwards your schema to LM Studio via the OpenAI-compatible `response_format` field.
+Codex accepts friendly aliases for the most common LM Studio builds (`llama`, `qwen2`, `qwen3`, `qwen3-moe`, `qwen3-moe-a3b`) or you can pass the exact identifier shown in LM Studio. If the requested model is not available, Codex reports a clear error so you can download or start it inside LM Studio. Structured output is optional: when you provide `--output-schema`, Codex forwards the schema to LM Studio via the OpenAI-compatible `response_format` field.
 
 ### Example prompts
 

From c55d51b9f84d384479519930363ff64835f81ba3 Mon Sep 17 00:00:00 2001
From: Hackall <36754621+hackall360@users.noreply.github.com>
Date: Sat, 27 Sep 2025 07:21:14 -0700
Subject: [PATCH 3/3] Enable default structured output for LM Studio

---
 README.md                                     |   2 +-
 codex-rs/exec/src/cli.rs                      |   4 -
 codex-rs/exec/src/lib.rs                      |  37 +---
 codex-rs/exec/src/output_schema.rs            |  69 +++++++
 codex-rs/exec/tests/suite/lmstudio.rs         |  94 ---------
 codex-rs/exec/tests/suite/mod.rs              |   2 +-
 codex-rs/exec/tests/suite/output_schema.rs    |  70 -------
 .../exec/tests/suite/structured_output.rs     | 194 ++++++++++++++++++
 docs/getting-started.md                       |   2 +-
 9 files changed, 273 insertions(+), 201 deletions(-)
 create mode 100644 codex-rs/exec/src/output_schema.rs
 delete mode 100644 codex-rs/exec/tests/suite/output_schema.rs
 create mode 100644 codex-rs/exec/tests/suite/structured_output.rs

diff --git a/README.md b/README.md
index e5d33394d51..91de07144af 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ Codex understands the following architecture aliases when `--backend lmstudio` i
 
 You can also pass the exact LM Studio identifier (for example `my-org/custom-model`) if you are running a different checkpoint. Codex verifies that the requested model is available from LM Studio and surfaces clear errors when it is not.
 
-Structured output is optional with LM Studio: Codex automatically forwards a schema when you provide `--output-schema`, but models can access command/tool output even without one.
+When you select the LM Studio backend Codex automatically enables structured JSON output so the agent can reliably capture command results. No extra flags are required.
 
 ---
 
diff --git a/codex-rs/exec/src/cli.rs b/codex-rs/exec/src/cli.rs
index bd3868c960b..538a6a000b0 100644
--- a/codex-rs/exec/src/cli.rs
+++ b/codex-rs/exec/src/cli.rs
@@ -58,10 +58,6 @@ pub struct Cli {
     #[arg(long = "skip-git-repo-check", default_value_t = false)]
     pub skip_git_repo_check: bool,
 
-    /// Path to a JSON Schema file describing the model's final response shape.
-    #[arg(long = "output-schema", value_name = "FILE")]
-    pub output_schema: Option<PathBuf>,
-
     #[clap(skip)]
     pub config_overrides: CliConfigOverrides,
 
diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs
index 2299cb0f66a..033020b7e69 100644
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -4,6 +4,7 @@ mod event_processor_with_human_output;
 pub mod event_processor_with_json_output;
 pub mod exec_events;
 pub mod experimental_event_processor_with_json_output;
+mod output_schema;
 
 use std::io::IsTerminal;
 use std::io::Read;
@@ -28,7 +29,6 @@ use codex_ollama::DEFAULT_OSS_MODEL;
 use codex_protocol::config_types::SandboxMode;
 use event_processor_with_human_output::EventProcessorWithHumanOutput;
 use experimental_event_processor_with_json_output::ExperimentalEventProcessorWithJsonOutput;
-use serde_json::Value;
 use tracing::debug;
 use tracing::error;
 use tracing::info;
@@ -58,7 +58,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
         experimental_json,
         sandbox_mode: sandbox_mode_cli_arg,
         prompt,
-        output_schema: output_schema_path,
         include_plan_tool,
         config_overrides,
     } = cli;
@@ -105,8 +104,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
         }
     };
 
-    let output_schema = load_output_schema(output_schema_path);
-
     let (stdout_with_ansi, stderr_with_ansi) = match color {
         cli::Color::Always => (true, true),
         cli::Color::Never => (false, false),
@@ -147,6 +144,12 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
     let using_oss = backend_choice.is_oss();
     let using_lmstudio = backend_choice.is_lmstudio();
 
+    let output_schema = if using_lmstudio {
+        Some(output_schema::default_lmstudio_schema())
+    } else {
+        None
+    };
+
     let mut model = if let Some(model) = model_cli_arg {
         Some(model)
     } else if using_oss {
@@ -398,29 +401,3 @@ async fn resolve_resume_path(
         Ok(None)
     }
 }
-
-fn load_output_schema(path: Option<PathBuf>) -> Option<Value> {
-    let path = path?;
-
-    let schema_str = match std::fs::read_to_string(&path) {
-        Ok(contents) => contents,
-        Err(err) => {
-            eprintln!(
-                "Failed to read output schema file {}: {err}",
-                path.display()
-            );
-            std::process::exit(1);
-        }
-    };
-
-    match serde_json::from_str::<Value>(&schema_str) {
-        Ok(value) => Some(value),
-        Err(err) => {
-            eprintln!(
-                "Output schema file {} is not valid JSON: {err}",
-                path.display()
-            );
-            std::process::exit(1);
-        }
-    }
-}
diff --git a/codex-rs/exec/src/output_schema.rs b/codex-rs/exec/src/output_schema.rs
new file mode 100644
index 00000000000..544ec8c1eb1
--- /dev/null
+++ b/codex-rs/exec/src/output_schema.rs
@@ -0,0 +1,69 @@
+use serde_json::Value;
+use serde_json::json;
+
+pub(crate) fn default_lmstudio_schema() -> Value {
+    json!({
+        "$schema": "http://json-schema.org/draft-07/schema#",
+        "title": "Codex CLI Final Response",
+        "description": "Structured JSON response emitted by Codex CLI sessions.",
+        "type": "object",
+        "properties": {
+            "status": {
+                "description": "Overall completion state.",
+                "type": "string",
+                "enum": ["success", "partial", "blocked", "error"]
+            },
+            "summary": {
+                "description": "Key bullet points summarizing the work performed.",
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "minLength": 1
+                },
+                "minItems": 1
+            },
+            "testing": {
+                "description": "Tests or checks that were executed.",
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "command": {
+                            "description": "Exact command that was run.",
+                            "type": "string",
+                            "minLength": 1
+                        },
+                        "status": {
+                            "description": "Outcome of the command.",
+                            "type": "string",
+                            "enum": ["pass", "fail", "not_run", "blocked"]
+                        },
+                        "details": {
+                            "description": "Additional context about the run.",
+                            "type": "string"
+                        }
+                    },
+                    "required": ["command", "status"],
+                    "additionalProperties": false
+                }
+            },
+            "next_steps": {
+                "description": "Follow-up work that should be considered.",
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "minLength": 1
+                }
+            },
+            "notes": {
+                "description": "Extra caveats or reminders for the user.",
+                "type": "array",
+                "items": {
+                    "type": "string"
+                }
+            }
+        },
+        "required": ["summary"],
+        "additionalProperties": false
+    })
+}
diff --git a/codex-rs/exec/tests/suite/lmstudio.rs b/codex-rs/exec/tests/suite/lmstudio.rs
index 92c5e611146..960935a379a 100644
--- a/codex-rs/exec/tests/suite/lmstudio.rs
+++ b/codex-rs/exec/tests/suite/lmstudio.rs
@@ -109,97 +109,3 @@ async fn exec_resolves_lmstudio_model_aliases() -> anyhow::Result<()> {
 
     Ok(())
 }
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn exec_includes_output_schema_for_lmstudio_requests() -> anyhow::Result<()> {
-    let test = test_codex_exec();
-
-    let schema_contents = serde_json::json!({
-        "type": "object",
-        "properties": {
-            "answer": { "type": "string" }
-        },
-        "required": ["answer"],
-        "additionalProperties": false
-    });
-    let schema_path = test.cwd_path().join("lmstudio-schema.json");
-    std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
-
-    let server = responses::start_mock_server().await;
-    let models_payload = serde_json::json!({
-        "data": [
-            { "id": DEFAULT_LM_STUDIO_MODEL }
-        ]
-    });
-
-    Mock::given(method("GET"))
-        .and(path("/v1/models"))
-        .respond_with(ResponseTemplate::new(200).set_body_json(models_payload))
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    let chat_stream = concat!(
-        "data: {\"choices\":[{\"delta\":{\"content\":\"ok\"}}]}\n\n",
-        "data: {\"choices\":[{\"delta\":{}}]}\n\n",
-        "data: [DONE]\n\n",
-    );
-
-    Mock::given(method("POST"))
-        .and(path("/v1/chat/completions"))
-        .respond_with(
-            ResponseTemplate::new(200)
-                .insert_header("content-type", "text/event-stream")
-                .set_body_raw(chat_stream, "text/event-stream"),
-        )
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    test.cmd()
-        .env("CODEX_LM_STUDIO_BASE_URL", format!("{}/v1", server.uri()))
-        .arg("--skip-git-repo-check")
-        .arg("--backend")
-        .arg("lmstudio")
-        .arg("--model")
-        .arg("llama")
-        .arg("--output-schema")
-        .arg(&schema_path)
-        .arg("hi")
-        .assert()
-        .success();
-
-    let requests = server
-        .received_requests()
-        .await
-        .expect("failed to capture requests");
-
-    let mut saw_response_format = false;
-    for req in &requests {
-        if req.method == Method::POST && req.url.path() == "/v1/chat/completions" {
-            let payload: Value = serde_json::from_slice(&req.body)
-                .context("LM Studio response request should be valid JSON")?;
-            let expected_format = serde_json::json!({
-                "type": "json_schema",
-                "json_schema": {
-                    "name": "codex_output_schema",
-                    "schema": schema_contents,
-                    "strict": true,
-                }
-            });
-            assert_eq!(
-                payload.get("response_format"),
-                Some(&expected_format),
-                "LM Studio request missing response_format payload"
-            );
-            saw_response_format = true;
-        }
-    }
-
-    assert!(
-        saw_response_format,
-        "did not observe LM Studio chat request"
-    );
-    server.verify().await;
-    Ok(())
-}
diff --git a/codex-rs/exec/tests/suite/mod.rs b/codex-rs/exec/tests/suite/mod.rs
index 79470025756..fb077425554 100644
--- a/codex-rs/exec/tests/suite/mod.rs
+++ b/codex-rs/exec/tests/suite/mod.rs
@@ -1,7 +1,7 @@
 // Aggregates all former standalone integration tests as modules.
 mod apply_patch;
 mod lmstudio;
-mod output_schema;
 mod resume;
 mod sandbox;
 mod server_error_exit;
+mod structured_output;
diff --git a/codex-rs/exec/tests/suite/output_schema.rs b/codex-rs/exec/tests/suite/output_schema.rs
deleted file mode 100644
index 03a04d0588e..00000000000
--- a/codex-rs/exec/tests/suite/output_schema.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-#![cfg(not(target_os = "windows"))]
-#![allow(clippy::expect_used, clippy::unwrap_used)]
-
-use core_test_support::responses;
-use core_test_support::test_codex_exec::test_codex_exec;
-use serde_json::Value;
-use wiremock::matchers::any;
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
-    let test = test_codex_exec();
-
-    let schema_contents = serde_json::json!({
-        "type": "object",
-        "properties": {
-            "answer": { "type": "string" }
-        },
-        "required": ["answer"],
-        "additionalProperties": false
-    });
-    let schema_path = test.cwd_path().join("schema.json");
-    std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
-    let expected_schema: Value = schema_contents;
-
-    let server = responses::start_mock_server().await;
-    let body = responses::sse(vec![
-        serde_json::json!({
-            "type": "response.created",
-            "response": {"id": "resp1"}
-        }),
-        responses::ev_assistant_message("m1", "fixture hello"),
-        responses::ev_completed("resp1"),
-    ]);
-    responses::mount_sse_once(&server, any(), body).await;
-
-    test.cmd_with_server(&server)
-        .arg("--skip-git-repo-check")
-        // keep using -C in the test to exercise the flag as well
-        .arg("-C")
-        .arg(test.cwd_path())
-        .arg("--output-schema")
-        .arg(&schema_path)
-        .arg("-m")
-        .arg("gpt-5")
-        .arg("tell me a joke")
-        .assert()
-        .success();
-
-    let requests = server
-        .received_requests()
-        .await
-        .expect("failed to capture requests");
-    assert_eq!(requests.len(), 1, "expected exactly one request");
-    let payload: Value = serde_json::from_slice(&requests[0].body)?;
-    let text = payload.get("text").expect("request missing text field");
-    let format = text
-        .get("format")
-        .expect("request missing text.format field");
-    assert_eq!(
-        format,
-        &serde_json::json!({
-            "name": "codex_output_schema",
-            "type": "json_schema",
-            "strict": true,
-            "schema": expected_schema,
-        })
-    );
-
-    Ok(())
-}
diff --git a/codex-rs/exec/tests/suite/structured_output.rs b/codex-rs/exec/tests/suite/structured_output.rs
new file mode 100644
index 00000000000..a6eaecb7515
--- /dev/null
+++ b/codex-rs/exec/tests/suite/structured_output.rs
@@ -0,0 +1,194 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use serde_json::Value;
+use serde_json::json;
+use wiremock::Mock;
+use wiremock::ResponseTemplate;
+use wiremock::http::Method;
+use wiremock::matchers::any;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+use codex_lmstudio::DEFAULT_LM_STUDIO_MODEL;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_sets_default_schema_for_lmstudio() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let server = responses::start_mock_server().await;
+    let models_payload = json!({
+        "data": [
+            { "id": DEFAULT_LM_STUDIO_MODEL }
+        ]
+    });
+    Mock::given(method("GET"))
+        .and(path("/v1/models"))
+        .respond_with(ResponseTemplate::new(200).set_body_json(models_payload))
+        .expect(1)
+        .mount(&server)
+        .await;
+    let body = responses::sse(vec![
+        serde_json::json!({
+            "type": "response.created",
+            "response": {"id": "resp1"}
+        }),
+        responses::ev_assistant_message("m1", "fixture hello"),
+        responses::ev_completed("resp1"),
+    ]);
+    Mock::given(method("POST"))
+        .and(path("/v1/chat/completions"))
+        .respond_with(
+            ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(body, "text/event-stream"),
+        )
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    test.cmd()
+        .env("CODEX_LM_STUDIO_BASE_URL", format!("{}/v1", server.uri()))
+        .arg("--skip-git-repo-check")
+        .arg("--backend")
+        .arg("lmstudio")
+        .arg("-m")
+        .arg("llama")
+        .arg("tell me a joke")
+        .assert()
+        .success();
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("failed to capture requests");
+    let chat_request = requests
+        .iter()
+        .find(|req| req.method == Method::POST && req.url.path() == "/v1/chat/completions")
+        .expect("expected LM Studio chat request");
+    let payload: Value = serde_json::from_slice(&chat_request.body)?;
+    let format = payload
+        .get("response_format")
+        .expect("request missing response_format field");
+
+    let expected_schema = json!({
+        "$schema": "http://json-schema.org/draft-07/schema#",
+        "title": "Codex CLI Final Response",
+        "description": "Structured JSON response emitted by Codex CLI sessions.",
+        "type": "object",
+        "properties": {
+            "status": {
+                "description": "Overall completion state.",
+                "type": "string",
+                "enum": ["success", "partial", "blocked", "error"]
+            },
+            "summary": {
+                "description": "Key bullet points summarizing the work performed.",
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "minLength": 1
+                },
+                "minItems": 1
+            },
+            "testing": {
+                "description": "Tests or checks that were executed.",
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "command": {
+                            "description": "Exact command that was run.",
+                            "type": "string",
+                            "minLength": 1
+                        },
+                        "status": {
+                            "description": "Outcome of the command.",
+                            "type": "string",
+                            "enum": ["pass", "fail", "not_run", "blocked"]
+                        },
+                        "details": {
+                            "description": "Additional context about the run.",
+                            "type": "string"
+                        }
+                    },
+                    "required": ["command", "status"],
+                    "additionalProperties": false
+                }
+            },
+            "next_steps": {
+                "description": "Follow-up work that should be considered.",
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "minLength": 1
+                }
+            },
+            "notes": {
+                "description": "Extra caveats or reminders for the user.",
+                "type": "array",
+                "items": {
+                    "type": "string"
+                }
+            }
+        },
+        "required": ["summary"],
+        "additionalProperties": false
+    });
+
+    assert_eq!(
+        format,
+        &json!({
+            "type": "json_schema",
+            "json_schema": {
+                "name": "codex_output_schema",
+                "schema": expected_schema,
+                "strict": true,
+            }
+        })
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_does_not_set_schema_for_openai() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        serde_json::json!({
+            "type": "response.created",
+            "response": {"id": "resp1"}
+        }),
+        responses::ev_assistant_message("m1", "fixture hello"),
+        responses::ev_completed("resp1"),
+    ]);
+    responses::mount_sse_once(&server, any(), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-m")
+        .arg("gpt-5")
+        .arg("tell me a joke")
+        .assert()
+        .success();
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("failed to capture requests");
+    assert_eq!(requests.len(), 1, "expected exactly one request");
+    let payload: Value = serde_json::from_slice(&requests[0].body)?;
+    assert!(
+        payload
+            .get("text")
+            .and_then(|text| text.get("format"))
+            .is_none(),
+        "OpenAI request should not include structured output schema by default"
+    );
+
+    Ok(())
+}
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 5317dd8fa7b..67919d4b6f2 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -59,7 +59,7 @@ To run Codex entirely against a local LM Studio instance:
    codex exec --backend lmstudio --model qwen3-moe "generate unit tests"
    ```
 
-Codex accepts friendly aliases for the most common LM Studio builds (`llama`, `qwen2`, `qwen3`, `qwen3-moe`, `qwen3-moe-a3b`) or you can pass the exact identifier shown in LM Studio. If the requested model is not available, Codex reports a clear error so you can download or start it inside LM Studio. Structured output is optional: when you provide `--output-schema`, Codex forwards the schema to LM Studio via the OpenAI-compatible `response_format` field.
+Codex accepts friendly aliases for the most common LM Studio builds (`llama`, `qwen2`, `qwen3`, `qwen3-moe`, `qwen3-moe-a3b`) or you can pass the exact identifier shown in LM Studio. If the requested model is not available, Codex reports a clear error so you can download or start it inside LM Studio. Structured JSON responses are enabled automatically for LM Studio so models reliably see command output; no additional flags are required.
 
 ### Example prompts