diff --git a/.vscode/launch.json b/.vscode/launch.json
index ae53199..4ef08b6 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,6 +1,7 @@
 {
     "version": "0.2.0",
     "configurations": [
+        
         {
             "name": "Debug AOAI Simulated API",
             "type": "debugpy",
@@ -27,7 +28,17 @@
             "type": "debugpy",
             "request": "launch",
             "program": "${file}",
-            "console": "integratedTerminal"
-        }
+            "console": "integratedTerminal",
+            "justMyCode": false
+        },
+        {
+            "name": "Python: Debug Tests",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "purpose": ["debug-test"],
+            "console": "integratedTerminal",
+            "justMyCode": false
+          }
     ]
 }
\ No newline at end of file
diff --git a/examples/openai_deployment_config.json b/examples/openai_deployment_config.json
index 1694519..8f2f985 100644
--- a/examples/openai_deployment_config.json
+++ b/examples/openai_deployment_config.json
@@ -39,6 +39,14 @@
         "model": "gpt-3.5-turbo",
         "tokensPerMinute" : 100000000
     },
+    "gpt-4-10k-token" : {
+        "model": "gpt-4",
+        "tokensPerMinute" : 10000
+    },
+    "gpt-4o-10k-token" : {
+        "model": "gpt-4o",
+        "tokensPerMinute" : 10000
+    },
     "embedding" : {
         "model": "text-embedding-ada-002",
         "tokensPerMinute" : 10000
diff --git a/http/chat-completions.http b/http/chat-completions.http
index f423639..cfd308f 100644
--- a/http/chat-completions.http
+++ b/http/chat-completions.http
@@ -15,9 +15,9 @@ api-key: {{aoai_key}}
 
 {
   "messages": [{
-    "role": "user",
-    "content": "Is this thing on?"
-    }],
+	"role": "user",
+	"content": "Is this thing on?"
+	}],
   "model": "gpt-5-turbo-1",
   "max_tokens": 20
 }
@@ -29,13 +29,75 @@ api-key: {{aoai_key}}
 
 {
   "messages": [{
-    "role": "user",
-    "content": "What is the meaning of life?"
-    }],
+	"role": "user",
+	"content": "What is the meaning of life?"
+	}],
   "model": "gpt-5-turbo-1",
   "max_tokens": 2000
 }
 
+###
+POST {{aoai_endpoint}}/openai/deployments/{{aoai_deployment}}/chat/completions?api-version=2024-10-21
+Content-Type: application/json
+api-key: {{aoai_key}}
+
+{
+  "messages": [{
+	"role": "user",
+	"content": "What is the meaning of life?"
+	}],
+  "model": "gpt-5-turbo-1",
+  "response_format": {
+	"type": "json_schema",
+	"json_schema": {
+					"name": "CalendarEventResponse",
+					"strict": true,
+					"schema": {
+						"type": "object",
+						"properties": {
+							"name": {
+							  "type": "string"
+							},
+							"date": {
+								"type": "string"
+							},
+							"participants": {
+								"type": "array",
+								"items": {
+									"type": "string"
+								}
+							}
+						},
+						"required": [
+							"name",
+							"date",
+							"participants"
+						],
+						"additionalProperties": false
+					}
+				}
+  },
+  "max_tokens": 2000
+}
+###
+POST {{aoai_endpoint}}/openai/deployments/{{aoai_deployment}}/chat/completions?api-version=2024-10-21
+Content-Type: application/json
+api-key: {{aoai_key}}
+
+{
+  "messages": [{
+	"role": "user",
+	"content": "What is the meaning of life?"
+	}],
+  "model": "gpt-5-turbo-1",
+  "response_format": {
+	"type": "json_schema",
+	"json_schema": {
+	}
+  },
+  "max_tokens": 2000
+}
+
 
 ###
 POST {{aoai_endpoint}}/openai/deployments/{{aoai_deployment}}/chat/completions?api-version=2023-12-01-preview
@@ -125,15 +187,15 @@ api-key: {{aoai_key}}
 
 {
   "messages": [{
-    "role": "user",
-    "content": "What is the meaning of life?"
-    }, {
-      "role": "assistant",
-      "content": "blank"
-    }, {
-      "role": "user",
-      "content": "What is the meaning of life?"
-    }],
+	"role": "user",
+	"content": "What is the meaning of life?"
+	}, {
+	  "role": "assistant",
+	  "content": "blank"
+	}, {
+	  "role": "user",
+	  "content": "What is the meaning of life?"
+	}],
   "model": "gpt-5-turbo-1"
 }
 
@@ -145,13 +207,13 @@ api-key: {{aoai_key}}
 
 {
   "messages": [
-    {"role": "user", "content": "What is the furthest human-made object from Earth?"},
-    {"role": "assistant", "content": "As an AI language model, I don't have the latest information on this. However, as of August 2021, the furthest human-made object from Earth is the Voyager 1 spacecraft. It was launched in 1977 and is currently over 14 billion miles away from Earth and is still transmitting data."},
+	{"role": "user", "content": "What is the furthest human-made object from Earth?"},
+	{"role": "assistant", "content": "As an AI language model, I don't have the latest information on this. However, as of August 2021, the furthest human-made object from Earth is the Voyager 1 spacecraft. It was launched in 1977 and is currently over 14 billion miles away from Earth and is still transmitting data."},
 
-    {"role": "user", "content": "How fast is it travelling?"},
-    {"role": "assistant", "content": "As of August 2021, Voyager 1 is traveling at a speed of about 17.7 km/s (62,136 km/h or 38,614 miles per hour). This is about 0.006% of the speed of light, which means it would take over 17,000 years to travel one light-year."},
+	{"role": "user", "content": "How fast is it travelling?"},
+	{"role": "assistant", "content": "As of August 2021, Voyager 1 is traveling at a speed of about 17.7 km/s (62,136 km/h or 38,614 miles per hour). This is about 0.006% of the speed of light, which means it would take over 17,000 years to travel one light-year."},
 
-    {"role": "user", "content": "How much does it weigh?"}
+	{"role": "user", "content": "How much does it weigh?"}
   ],
   "model": "gpt-5-turbo-1"
 }
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/generator/model_catalogue.py b/src/aoai-api-simulator/src/aoai_api_simulator/generator/model_catalogue.py
index ad5de51..9d35433 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/generator/model_catalogue.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/generator/model_catalogue.py
@@ -1,8 +1,12 @@
 from aoai_api_simulator.models import OpenAIChatModel, OpenAIEmbeddingModel, OpenAIWhisperModel
 
+# models: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
+
 model_catalogue = {
-    "gpt-3.5-turbo": OpenAIChatModel(name="gpt-3.5-turbo"),
-    "gpt-3.5-turbo-0613": OpenAIChatModel(name="gpt-3.5-turbo-0613"),
+    "gpt-3.5-turbo": OpenAIChatModel(name="gpt-3.5-turbo", supports_json_schema=False),
+    "gpt-3.5-turbo-0613": OpenAIChatModel(name="gpt-3.5-turbo-0613", supports_json_schema=False),
+    "gpt-4": OpenAIChatModel(name="gpt-4", supports_json_schema=True),
+    "gpt-4o": OpenAIChatModel(name="gpt-4o", supports_json_schema=True),
     "text-embedding-ada-001": OpenAIEmbeddingModel(name="text-embedding-ada-001", supports_custom_dimensions=False),
     "text-embedding-ada-002": OpenAIEmbeddingModel(name="text-embedding-ada-002", supports_custom_dimensions=False),
     "text-embedding-3-small": OpenAIEmbeddingModel(name="text-embedding-3-small", supports_custom_dimensions=True),
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai.py b/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai.py
index 2b1dce0..2454d37 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai.py
@@ -40,6 +40,7 @@
 )
 from fastapi import Response
 from fastapi.responses import StreamingResponse
+from jsf import JSF
 
 # This file contains a default implementation of the openai generators
 # You can configure your own generators by creating a generator_config.py file and setting the
@@ -57,6 +58,16 @@
 )
 
 
+def _deployment_not_found_response(deployment_name: str) -> Response:
+    return Response(
+        status_code=404,
+        content=json.dumps({"error": f"Deployment {deployment_name} not found"}),
+        headers={
+            "Content-Type": "application/json",
+        },
+    )
+
+
 def get_embedding_deployment_from_name(context: RequestContext, deployment_name: str) -> OpenAIDeployment | None:
     """
     Gets the embedding model for the specified embedding deployment.
@@ -531,13 +542,7 @@ async def azure_openai_embedding(context: RequestContext) -> Response | None:
     deployment = get_embedding_deployment_from_name(context, deployment_name)
 
     if deployment is None:
-        return Response(
-            status_code=404,
-            content=json.dumps({"error": f"Deployment {deployment_name} not found"}),
-            headers={
-                "Content-Type": "application/json",
-            },
-        )
+        return _deployment_not_found_response(deployment_name)
 
     if not isinstance(deployment.model, OpenAIEmbeddingModel):
         return Response(
@@ -588,13 +593,7 @@ async def azure_openai_completion(context: RequestContext) -> Response | None:
     deployment_name = path_params["deployment"]
     model = get_chat_model_from_deployment_name(context, deployment_name)
     if model is None:
-        return Response(
-            status_code=404,
-            content=json.dumps({"error": f"Deployment {deployment_name} not found"}),
-            headers={
-                "Content-Type": "application/json",
-            },
-        )
+        return _deployment_not_found_response(deployment_name)
 
     if not isinstance(model, OpenAIChatModel):
         return Response(
@@ -652,13 +651,8 @@ async def azure_openai_chat_completion(context: RequestContext) -> Response | No
     deployment_name = path_params["deployment"]
     model = get_chat_model_from_deployment_name(context, deployment_name)
     if model is None:
-        return Response(
-            status_code=404,
-            content=json.dumps({"error": f"Deployment {deployment_name} not found"}),
-            headers={
-                "Content-Type": "application/json",
-            },
-        )
+        return _deployment_not_found_response(deployment_name)
+
     if not isinstance(model, OpenAIChatModel):
         return Response(
             status_code=400,
@@ -688,20 +682,145 @@ async def azure_openai_chat_completion(context: RequestContext) -> Response | No
     context.values[SIMULATOR_KEY_OPENAI_MAX_TOKENS_EFFECTIVE] = max_tokens
 
     streaming = request_body.get("stream", False)
+    response_format = request_body.get("response_format", None)
+
+    # pylint: disable-next=line-too-long
+    # versions https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=python-secure%2Cglobal-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models
+    response_format_type = response_format.get("type") if response_format else "text"
+    if response_format_type == "text":
+        response = create_lorem_chat_completion_response(
+            context=context,
+            deployment_name=deployment_name,
+            model_name=model.name,
+            streaming=streaming,
+            max_tokens=max_tokens,
+            prompt_messages=messages,
+        )
+    elif response_format_type == "json_schema":
+        response = create_json_schema_chat_completion_response(
+            context=context,
+            model=model,
+            deployment_name=deployment_name,
+            streaming=streaming,
+            messages=messages,
+            response_format=response_format,
+        )
+    else:
+        return Response(
+            status_code=400,
+            content=json.dumps(
+                {
+                    "error": {
+                        "message": f"Unsupported response_format type: {response_format_type}. "
+                        + "Currently supported types are 'text' and 'json_schema'.",
+                        "type": "invalid_request_error",
+                        "param": None,
+                        "code": None,
+                    }
+                }
+            ),
+            headers={
+                "Content-Type": "application/json",
+            },
+        )
+
+    # calculate a simulated latency and store in context.values
+    # needs to be called after the response has been created
+    await calculate_latency_text_endpoints(context, 200)
+
+    return response
+
+
+# pylint: disable-next=too-many-arguments, too-many-positional-arguments
+async def create_json_schema_chat_completion_response(
+    context: RequestContext,
+    model: OpenAIChatModel,
+    deployment_name: str,
+    streaming: bool,
+    messages: list,
+    response_format: dict,
+):
+    # response_format value as json_schema is enabled only for api versions 2024-08-01-preview and later
+    # We're not currently enforcing this limit, but could consider it
 
-    response = create_lorem_chat_completion_response(
+    # https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/structured-outputs?tabs=rest
+    if not model.supports_json_schema:
+        return Response(
+            status_code=400,
+            content=json.dumps(
+                {
+                    "error": {
+                        "message": "Invalid parameter: 'response_format' of type 'json_schema' is not supported "
+                        + "with this model. Learn more about supported models at the Structured Outputs "
+                        + "guide: https://platform.openai.com/docs/guides/structured-outputs",
+                        "type": "invalid_request_error",
+                        "param": None,
+                        "code": None,
+                    }
+                }
+            ),
+            headers={
+                "Content-Type": "application/json",
+            },
+        )
+
+    error_message = None
+    schema_info = response_format.get("json_schema")
+    if schema_info is None:
+        error_message = "Missing required parameter: 'response_format.json_schema'."
+    else:
+        schema = schema_info.get("schema")
+        if schema is None:
+            error_message = "Missing required parameter: 'response_format.json_schema.schema'."
+    if error_message:
+        return Response(
+            status_code=400,
+            content=json.dumps(
+                {
+                    "error": {
+                        "message": error_message,
+                        "type": "invalid_request_error",
+                        "param": None,
+                        "code": None,
+                    }
+                }
+            ),
+            headers={
+                "Content-Type": "application/json",
+            },
+        )
+
+    # Generate a JSON response based on the schema
+
+    # Check for $provider in schema (https://github.com/ghandic/jsf/issues/1)
+    if "$provider" in json.dumps(schema):
+        return Response(
+            status_code=400,
+            content=json.dumps(
+                {
+                    "error": {
+                        "message": "$provider is not allowed in JSON Schema.",
+                        "type": "invalid_request_error",
+                        "param": None,
+                        "code": None,
+                    }
+                }
+            ),
+        )
+
+    # TODO: add caching of JSF objects
+    json_faker = JSF(schema)
+    json_response = json_faker.generate()
+
+    response = create_chat_completion_response(
         context=context,
         deployment_name=deployment_name,
         model_name=model.name,
         streaming=streaming,
-        max_tokens=max_tokens,
         prompt_messages=messages,
+        generated_content=json.dumps(json_response),
+        finish_reason="stop",
     )
-
-    # calculate a simulated latency and store in context.values
-    # needs to be called after the response has been created
-    await calculate_latency_text_endpoints(context, 200)
-
     return response
 
 
@@ -718,13 +837,8 @@ async def azure_openai_translation(context: RequestContext) -> Response | None:
     deployment_name = path_params["deployment"]
     model = get_whisper_model_from_deployment_name(context, deployment_name)
     if model is None:
-        return Response(
-            status_code=404,
-            content=json.dumps({"error": f"Deployment {deployment_name} not found"}),
-            headers={
-                "Content-Type": "application/json",
-            },
-        )
+        return _deployment_not_found_response(deployment_name)
+
     request_form = await request.form()
     audio_file = request_form["file"]
     response_format = request_form["response_format"]
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai_tokens.py b/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai_tokens.py
index 44382c0..104aced 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai_tokens.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/generator/openai_tokens.py
@@ -58,32 +58,44 @@ def num_tokens_from_messages(messages, model):
     if model in {
         "gpt-3.5-turbo-0613",
         "gpt-3.5-turbo-16k-0613",
+        "gpt-3.5-turbo-0125",
         "gpt-4-0314",
         "gpt-4-32k-0314",
         "gpt-4-0613",
         "gpt-4-32k-0613",
+        "gpt-4o-mini-2024-07-18",
+        "gpt-4o-2024-08-06",
     }:
         tokens_per_message = 3
         tokens_per_name = 1
-    elif model == "gpt-3.5-turbo-0301":
-        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
-        tokens_per_name = -1  # if there's a name, the role is omitted
     elif "gpt-3.5-turbo" in model:
         _warn_once(
             model, "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613."
         )
         return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
+    elif model == "gpt-3.5-turbo-0301":
+        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
+        tokens_per_name = -1  # if there's a name, the role is omitted    elif "gpt-3.5-turbo" in model:
+        _warn_once(
+            model, "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125."
+        )
+        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0125")
+    elif "gpt-4o-mini" in model:
+        _warn_once(
+            model, "Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18."
+        )
+        return num_tokens_from_messages(messages, model="gpt-4o-mini-2024-07-18")
+    elif "gpt-4o" in model:
+        _warn_once(
+            model,
+            "Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.",
+        )
+        return num_tokens_from_messages(messages, model="gpt-4o-2024-08-06")
     elif "gpt-4" in model:
         _warn_once(model, "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
         return num_tokens_from_messages(messages, model="gpt-4-0613")
-    elif "whisper" in model:
-        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
     else:
-        raise NotImplementedError(
-            f"num_tokens_from_messages() is not implemented for model {model}. "
-            + "See https://github.com/openai/openai-python/blob/main/chatml.md for information "
-            + " on how messages are converted to tokens."
-        )
+        raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}.""")
     num_tokens = 0
     for message in messages:
         num_tokens += tokens_per_message
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/models.py b/src/aoai-api-simulator/src/aoai_api_simulator/models.py
index b1d9f59..cbfe101 100644
--- a/src/aoai-api-simulator/src/aoai_api_simulator/models.py
+++ b/src/aoai-api-simulator/src/aoai_api_simulator/models.py
@@ -187,6 +187,8 @@ def is_token_limited(self) -> bool:
 
 @dataclass
 class OpenAIChatModel(OpenAIModel):
+    supports_json_schema: bool
+
     @property
     def is_token_limited(self) -> bool:
         return True
diff --git a/tests/requirements.txt b/tests/requirements.txt
index c39d9d7..c7c4b6f 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,5 @@
-openai==1.16.1
+openai==1.42.0
+pydantic==2.8.2
 pytest==8.1.1
 pytest-asyncio==0.23.6
 pytest-watch==4.2.0
diff --git a/tests/test_openai_generator_chat_completion.py b/tests/test_openai_generator_chat_completion.py
index a9fef99..bc8737f 100644
--- a/tests/test_openai_generator_chat_completion.py
+++ b/tests/test_openai_generator_chat_completion.py
@@ -2,6 +2,9 @@
 Test the OpenAI generator endpoints
 """
 
+import collections.abc
+import json
+
 import pytest
 from aoai_api_simulator.generator.manager import get_default_generators
 from aoai_api_simulator.generator.model_catalogue import model_catalogue
@@ -15,6 +18,7 @@
 )
 from openai import AuthenticationError, AzureOpenAI, BadRequestError, NotFoundError, Stream
 from openai.types.chat import ChatCompletionChunk
+from pydantic import BaseModel
 
 from .test_uvicorn_server import UvicornTestServer
 
@@ -55,6 +59,11 @@ def _get_generator_config(extension_path: str | None = None) -> Config:
             model=model_catalogue["gpt-3.5-turbo"],
             tokens_per_minute=10000000,
         ),
+        "gpt-4-10m": OpenAIDeployment(
+            name="gpt-4-10m",
+            model=model_catalogue["gpt-4"],
+            tokens_per_minute=10000000,
+        ),
     }
     config.extension_path = extension_path
     return config
@@ -264,3 +273,240 @@ async def test_using_unsupported_model_for_completions_returns_400():
             e.value.message
             == "Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The chatCompletion operation does not work with the specified model, deployment1. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}"
         )
+
+
+@pytest.mark.asyncio
+async def test_response_format_not_present_gives_plain_text():
+    """
+    Ensure responses without a response_format in the request default to plain text
+    """
+    config = _get_generator_config()
+    server = UvicornTestServer(config)
+    with server.run_in_thread():
+        aoai_client = AzureOpenAI(
+            api_key=API_KEY,
+            api_version="2023-12-01-preview",
+            azure_endpoint=ENDPOINT,
+            max_retries=0,
+        )
+        messages = [{"role": "user", "content": "What is the meaning of life?"}]
+        max_tokens = 50
+        response = aoai_client.chat.completions.create(model="gpt-4-10m", messages=messages, max_tokens=max_tokens)
+
+        assert len(response.choices) == 1
+        assert len(response.choices[0].message.content) > 20
+        assert response.choices[0].message.content[0] != "{"
+
+
+@pytest.mark.asyncio
+async def test_response_format_text_gives_plain_text():
+    """
+    Ensure responses with response_format of "text" in the request return plain text
+    """
+    config = _get_generator_config()
+    server = UvicornTestServer(config)
+    with server.run_in_thread():
+        aoai_client = AzureOpenAI(
+            api_key=API_KEY,
+            api_version="2023-12-01-preview",
+            azure_endpoint=ENDPOINT,
+            max_retries=0,
+        )
+        messages = [{"role": "user", "content": "What is the meaning of life?"}]
+        max_tokens = 50
+        response = aoai_client.chat.completions.create(model="gpt-4-10m", messages=messages, max_tokens=max_tokens)
+
+        assert len(response.choices) == 1
+        assert len(response.choices[0].message.content) > 20
+        assert response.choices[0].message.content[0] != "{"
+
+
+@pytest.mark.asyncio
+async def test_response_format_invalid_schema_gives_error():
+    """
+    Ensure responses with response_format of "something_invalid" in the request get an error response
+    """
+
+    config = _get_generator_config()
+    server = UvicornTestServer(config)
+    with server.run_in_thread():
+        aoai_client = AzureOpenAI(
+            api_key=API_KEY,
+            api_version="2023-12-01-preview",
+            azure_endpoint=ENDPOINT,
+            max_retries=0,
+        )
+        messages = [{"role": "user", "content": "What is the meaning of life?"}]
+        with pytest.raises(BadRequestError) as e:
+            aoai_client.chat.completions.create(
+                model="gpt-4-10m",
+                messages=messages,
+                max_tokens=1000,
+                response_format={
+                    "type": "something_invalid",
+                },
+            )
+
+        assert e.value.status_code == 400
+        assert "Unsupported response_format type: something_invalid" in e.value.message
+
+
+@pytest.mark.asyncio
+async def test_response_format_json_schema_gives_json_pydantic():
+    """
+    Ensure responses with response_format of "json_schema" in the request return json
+    using the OpenAI SDK pydantic helpers
+    """
+
+    class CalendarEvent(BaseModel):
+        name: str
+        date: str
+        participants: list[str]
+
+    config = _get_generator_config()
+    server = UvicornTestServer(config)
+    with server.run_in_thread():
+        aoai_client = AzureOpenAI(
+            api_key=API_KEY,
+            api_version="2023-12-01-preview",
+            azure_endpoint=ENDPOINT,
+            max_retries=0,
+        )
+        messages = [{"role": "user", "content": "What is the meaning of life?"}]
+        response = aoai_client.beta.chat.completions.parse(
+            model="gpt-4-10m", messages=messages, max_tokens=1000, response_format=CalendarEvent
+        )
+
+        assert len(response.choices) == 1
+        assert len(response.choices[0].message.content) > 20
+        assert response.choices[0].message.content[0] == "{", "expected json response"
+        assert response.choices[0].message.parsed is not None
+        assert isinstance(response.choices[0].message.parsed.name, str)
+        assert isinstance(response.choices[0].message.parsed.date, str)
+        assert not isinstance(response.choices[0].message.parsed.participants, str)
+        assert isinstance(response.choices[0].message.parsed.participants, collections.abc.Sequence)
+
+
+@pytest.mark.asyncio
+async def test_response_format_json_schema_gives_json_manual():
+    """
+    Ensure responses with response_format of "json_schema" in the request return json
+    using manual schema specification
+    """
+
+    config = _get_generator_config()
+    server = UvicornTestServer(config)
+    with server.run_in_thread():
+        aoai_client = AzureOpenAI(
+            api_key=API_KEY,
+            api_version="2023-12-01-preview",
+            azure_endpoint=ENDPOINT,
+            max_retries=0,
+        )
+        messages = [{"role": "user", "content": "What is the meaning of life?"}]
+        response = aoai_client.chat.completions.create(
+            model="gpt-4-10m",
+            messages=messages,
+            max_tokens=1000,
+            response_format={
+                "type": "json_schema",
+                "json_schema": {
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "name": {"type": "string"},
+                            "date": {"type": "string"},
+                            "participants": {"type": "array", "items": {"type": "string"}},
+                        },
+                        "required": ["name", "date", "participants"],
+                        "additionalProperties": False,
+                    },
+                },
+            },
+        )
+
+        assert len(response.choices) == 1
+        assert len(response.choices[0].message.content) > 20
+        assert response.choices[0].message.content[0] == "{", "expected json response"
+        json_content = json.loads(response.choices[0].message.content)
+        assert isinstance(json_content["name"], str)
+        assert isinstance(json_content["date"], str)
+        assert not isinstance(json_content["participants"], str)
+        assert isinstance(json_content["participants"], collections.abc.Sequence)
+
+
+@pytest.mark.asyncio
+async def test_response_format_json_schema_with_invalid_model_gives_error():
+    """
+    Ensure json_schema isn't used with models that don't support it
+    """
+
+    class CalendarEvent(BaseModel):
+        name: str
+        date: str
+        participants: list[str]
+
+    config = _get_generator_config()
+    server = UvicornTestServer(config)
+    with server.run_in_thread():
+        aoai_client = AzureOpenAI(
+            api_key=API_KEY,
+            api_version="2023-12-01-preview",
+            azure_endpoint=ENDPOINT,
+            max_retries=0,
+        )
+
+        with pytest.raises(BadRequestError) as e:
+            messages = [{"role": "user", "content": "What is the meaning of life?"}]
+            aoai_client.beta.chat.completions.parse(
+                # Use GPT 3.5 which doesn't support json_schema
+                model="gpt-3.5-10m",
+                messages=messages,
+                max_tokens=1000,
+                response_format=CalendarEvent,
+            )
+
+        assert e.value.status_code == 400
+        assert "'response_format' of type 'json_schema' is not supported with this model" in e.value.message
+
+
+@pytest.mark.asyncio
+async def test_response_format_json_schema_with_jsf_provider_gives_error():
+    """
+    Ensure responses with a schema using the $provider hints in JSF are rejected.
+    """
+
+    config = _get_generator_config()
+    server = UvicornTestServer(config)
+    with server.run_in_thread():
+        aoai_client = AzureOpenAI(
+            api_key=API_KEY,
+            api_version="2023-12-01-preview",
+            azure_endpoint=ENDPOINT,
+            max_retries=0,
+        )
+        messages = [{"role": "user", "content": "What is the meaning of life?"}]
+        with pytest.raises(BadRequestError) as e:
+            aoai_client.chat.completions.create(
+                model="gpt-4-10m",
+                messages=messages,
+                max_tokens=1000,
+                response_format={
+                    "type": "json_schema",
+                    "json_schema": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "name": {"type": "string", "$provider": "faker.name"},
+                                "date": {"type": "string"},
+                                "participants": {"type": "array", "items": {"type": "string"}},
+                            },
+                            "required": ["name", "date", "participants"],
+                            "additionalProperties": False,
+                        },
+                    },
+                },
+            )
+
+        assert e.value.status_code == 400
+        assert "$provider is not allowed in JSON Schema" in e.value.message