From 59c48d522e2cdf21b41c3a66cab7803a1713721b Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 11 Jul 2023 22:43:51 -0500 Subject: [PATCH 1/3] Add endpoint_type parameter --- chatstream/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/chatstream/__init__.py b/chatstream/__init__.py index d6b6806..5f2c8c8 100644 --- a/chatstream/__init__.py +++ b/chatstream/__init__.py @@ -177,6 +177,7 @@ def __init__( answer_preprocessor: Callable[[str], ui.TagChild] | Callable[[str], Awaitable[ui.TagChild]] | None = None, + endpoint_type: Literal["openai", "azure"] = "openai", debug: bool = False, ): self.input = input @@ -210,6 +211,8 @@ def __init__( answer_preprocessor = lambda x: ui.markdown(x) self.answer_preprocessor = wrap_async(answer_preprocessor) + self.endpoint_type = endpoint_type + self.print_request = debug # This contains a tuple of the most recent messages when a streaming response is @@ -338,13 +341,18 @@ async def perform_query(): if self.url() is not None: extra_kwargs["url"] = self.url() + # Azure uses deployment_name instead of model. + if self.endpoint_type == "openai": + extra_kwargs["model"] = self.model() + elif self.endpoint_type == "azure": + extra_kwargs["deployment_name"] = self.model() + # Launch a Task that updates the chat string asynchronously. We run this in # a separate task so that the data can come in without need to await it in # this Task (which would block other computation to happen, like running # reactive stuff). messages: StreamResult[ChatCompletionStreaming] = stream_to_reactive( openai.ChatCompletion.acreate( # pyright: ignore[reportUnknownMemberType, reportGeneralTypeIssues] - model=self.model(), api_key=self.api_key(), messages=outgoing_messages_normalized, stream=True, From 2928b062b485ff645557cca1e1642860efe7b347 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 12 Jul 2023 17:09:15 -0500 Subject: [PATCH 2/3] Add mapping from Azure to OpenAI model names --- chatstream/__init__.py | 17 +++++++++++++---- chatstream/openai_types.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/chatstream/__init__.py b/chatstream/__init__.py index 5f2c8c8..4d8da70 100644 --- a/chatstream/__init__.py +++ b/chatstream/__init__.py @@ -35,10 +35,12 @@ from shiny import Inputs, Outputs, Session, module, reactive, render, ui from .openai_types import ( + AzureOpenAiModel, ChatCompletionStreaming, ChatMessage, OpenAiModel, openai_model_context_limits, + openai_model_name, ) if "pyodide" in sys.modules: @@ -142,6 +144,10 @@ class chat_server: from the AI assistant before it is displayed in the chat UI. Note that is run on streaming data. As each piece of streaming data comes in, the entire accumulated string is run through this function. + endpoint_type + Either "openai" or "azure", for OpenAI and Azure-OpenAI endpoints, respectively. + Azure uses then OpenAI API, but with some slight changes, and so if you are + using Azure, you must set this to "azure". debug Whether to print debugging infromation to the console. @@ -163,7 +169,9 @@ def __init__( output: Outputs, session: Session, *, - model: OpenAiModel | Callable[[], OpenAiModel] = DEFAULT_MODEL, + model: OpenAiModel + | AzureOpenAiModel + | Callable[[], OpenAiModel | AzureOpenAiModel] = DEFAULT_MODEL, api_key: str | Callable[[], str] | None = None, url: str | Callable[[], str] | None = None, system_prompt: str | Callable[[], str] = DEFAULT_SYSTEM_PROMPT, @@ -187,7 +195,7 @@ def __init__( # Ensure these are functions, even if we were passed static values. self.model = cast( # pyright needs a little help with this. - Callable[[], OpenAiModel], + Callable[[], OpenAiModel | AzureOpenAiModel], wrap_function_nonreactive(model), ) if api_key is None: @@ -318,7 +326,7 @@ async def perform_query(): # Count tokens, going backward. outgoing_messages: list[ChatMessageEnriched] = [] tokens_total = self._system_prompt_message()["token_count"] - max_tokens = openai_model_context_limits[self.model()] + max_tokens = openai_model_context_limits[openai_model_name(self.model())] for message in reversed(session_messages2): if tokens_total + message["token_count"] > max_tokens: break @@ -487,7 +495,8 @@ def get_env_var_api_key() -> str: return key -def get_token_count(s: str, model: OpenAiModel) -> int: +def get_token_count(s: str, model: OpenAiModel | AzureOpenAiModel) -> int: + model = openai_model_name(model) encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(s)) diff --git a/chatstream/openai_types.py b/chatstream/openai_types.py index f790cf8..35d7039 100644 --- a/chatstream/openai_types.py +++ b/chatstream/openai_types.py @@ -15,6 +15,24 @@ "gpt-4-32k-0314", ] +# Azure has different names for models: +# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models +AzureOpenAiModel = Literal[ + "gpt-35-turbo", + "gpt-35-turbo-16k", + "gpt-4", + "gpt-4-32k", +] + +# Mapping from Azure OpenAI model names to OpenAi model names +azure_openai_model_mapping: dict[AzureOpenAiModel, OpenAiModel] = { + "gpt-35-turbo": "gpt-3.5-turbo", + "gpt-35-turbo-16k": "gpt-3.5-turbo-16k", + "gpt-4": "gpt-4", + "gpt-4-32k": "gpt-4-32k", +} + + openai_model_context_limits: dict[OpenAiModel, int] = { "gpt-3.5-turbo": 4096, "gpt-3.5-turbo-16k": 16384, @@ -74,3 +92,21 @@ class ChatCompletionNonStreaming(TypedDict): class ChatCompletionStreaming(ChatCompletionBase): object: Literal["chat.completion.chunk"] choices: list[ChoiceStreaming] + + +def openai_model_name(model: OpenAiModel | AzureOpenAiModel) -> OpenAiModel: + """Given an OpenAI or Azure OpenAI model name, return the OpenAI model name. + + OpenAI and Azure OpenAI have different names for the same models. This function + converts from Azure OpenAI model names to OpenAI model names. + + Args: + model: An OpenAI or Azure OpenAI model name. + + Returns: + : An OpenAI model name + """ + if model in azure_openai_model_mapping: + return azure_openai_model_mapping[model] + else: + return model # pyright: ignore[reportGeneralTypeIssues] From d5b2e0776a7f41a0ca8a338172ae802e5b3233bb Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 19 Jul 2023 00:36:39 -0500 Subject: [PATCH 3/3] Fix Azure endpoint communication --- chatstream/__init__.py | 42 +++++++++++++++++++------------------- chatstream/openai_types.py | 35 ------------------------------- 2 files changed, 21 insertions(+), 56 deletions(-) diff --git a/chatstream/__init__.py b/chatstream/__init__.py index 4d8da70..d19f97c 100644 --- a/chatstream/__init__.py +++ b/chatstream/__init__.py @@ -35,12 +35,10 @@ from shiny import Inputs, Outputs, Session, module, reactive, render, ui from .openai_types import ( - AzureOpenAiModel, ChatCompletionStreaming, ChatMessage, OpenAiModel, openai_model_context_limits, - openai_model_name, ) if "pyodide" in sys.modules: @@ -113,6 +111,10 @@ class chat_server: ---------- model OpenAI model to use. Can be a string or a function that returns a string. + azure_deployment_id + Azure deployment ID to use (optional). Azure supports the OpenAI API, but with + some slight changes. If you are using Azure, you must set this to the your + deployment ID. Can be a string or a function that return a string. api_key OpenAI API key to use (optional). Can be a string or a function that returns a string, or `None`. If `None`, then it will use the `OPENAI_API_KEY` environment @@ -126,7 +128,7 @@ class chat_server: temperature Temperature to use. Can be a float or a function that returns a float. text_input_placeholder - Placeholder teext to use for the text input. Can be a string or a function that + Placeholder text to use for the text input. Can be a string or a function that returns a string, or `None` for no placeholder. throttle Throttle interval to use for incoming streaming messages. Can be a float or a @@ -144,10 +146,6 @@ class chat_server: from the AI assistant before it is displayed in the chat UI. Note that is run on streaming data. As each piece of streaming data comes in, the entire accumulated string is run through this function. - endpoint_type - Either "openai" or "azure", for OpenAI and Azure-OpenAI endpoints, respectively. - Azure uses then OpenAI API, but with some slight changes, and so if you are - using Azure, you must set this to "azure". debug Whether to print debugging infromation to the console. @@ -169,9 +167,8 @@ def __init__( output: Outputs, session: Session, *, - model: OpenAiModel - | AzureOpenAiModel - | Callable[[], OpenAiModel | AzureOpenAiModel] = DEFAULT_MODEL, + model: OpenAiModel | Callable[[], OpenAiModel] = DEFAULT_MODEL, + azure_deployment_id: str | Callable[[], str] | None = None, api_key: str | Callable[[], str] | None = None, url: str | Callable[[], str] | None = None, system_prompt: str | Callable[[], str] = DEFAULT_SYSTEM_PROMPT, @@ -185,7 +182,6 @@ def __init__( answer_preprocessor: Callable[[str], ui.TagChild] | Callable[[str], Awaitable[ui.TagChild]] | None = None, - endpoint_type: Literal["openai", "azure"] = "openai", debug: bool = False, ): self.input = input @@ -195,9 +191,15 @@ def __init__( # Ensure these are functions, even if we were passed static values. self.model = cast( # pyright needs a little help with this. - Callable[[], OpenAiModel | AzureOpenAiModel], + Callable[[], OpenAiModel], wrap_function_nonreactive(model), ) + + if azure_deployment_id is None: + self.azure_deployment_id = None + else: + self.azure_deployment_id = wrap_function_nonreactive(azure_deployment_id) + if api_key is None: self.api_key = get_env_var_api_key else: @@ -219,8 +221,6 @@ def __init__( answer_preprocessor = lambda x: ui.markdown(x) self.answer_preprocessor = wrap_async(answer_preprocessor) - self.endpoint_type = endpoint_type - self.print_request = debug # This contains a tuple of the most recent messages when a streaming response is @@ -326,7 +326,7 @@ async def perform_query(): # Count tokens, going backward. outgoing_messages: list[ChatMessageEnriched] = [] tokens_total = self._system_prompt_message()["token_count"] - max_tokens = openai_model_context_limits[openai_model_name(self.model())] + max_tokens = openai_model_context_limits[self.model()] for message in reversed(session_messages2): if tokens_total + message["token_count"] > max_tokens: break @@ -349,11 +349,12 @@ async def perform_query(): if self.url() is not None: extra_kwargs["url"] = self.url() - # Azure uses deployment_name instead of model. - if self.endpoint_type == "openai": + if self.azure_deployment_id is not None: + # Azure-OpenAI uses deployment_id instead of model. + extra_kwargs["deployment_id"] = self.azure_deployment_id() + else: + # OpenAI just uses model. extra_kwargs["model"] = self.model() - elif self.endpoint_type == "azure": - extra_kwargs["deployment_name"] = self.model() # Launch a Task that updates the chat string asynchronously. We run this in # a separate task so that the data can come in without need to await it in @@ -495,8 +496,7 @@ def get_env_var_api_key() -> str: return key -def get_token_count(s: str, model: OpenAiModel | AzureOpenAiModel) -> int: - model = openai_model_name(model) +def get_token_count(s: str, model: OpenAiModel) -> int: encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(s)) diff --git a/chatstream/openai_types.py b/chatstream/openai_types.py index 35d7039..bca06fc 100644 --- a/chatstream/openai_types.py +++ b/chatstream/openai_types.py @@ -15,23 +15,6 @@ "gpt-4-32k-0314", ] -# Azure has different names for models: -# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models -AzureOpenAiModel = Literal[ - "gpt-35-turbo", - "gpt-35-turbo-16k", - "gpt-4", - "gpt-4-32k", -] - -# Mapping from Azure OpenAI model names to OpenAi model names -azure_openai_model_mapping: dict[AzureOpenAiModel, OpenAiModel] = { - "gpt-35-turbo": "gpt-3.5-turbo", - "gpt-35-turbo-16k": "gpt-3.5-turbo-16k", - "gpt-4": "gpt-4", - "gpt-4-32k": "gpt-4-32k", -} - openai_model_context_limits: dict[OpenAiModel, int] = { "gpt-3.5-turbo": 4096, @@ -92,21 +75,3 @@ class ChatCompletionNonStreaming(TypedDict): class ChatCompletionStreaming(ChatCompletionBase): object: Literal["chat.completion.chunk"] choices: list[ChoiceStreaming] - - -def openai_model_name(model: OpenAiModel | AzureOpenAiModel) -> OpenAiModel: - """Given an OpenAI or Azure OpenAI model name, return the OpenAI model name. - - OpenAI and Azure OpenAI have different names for the same models. This function - converts from Azure OpenAI model names to OpenAI model names. - - Args: - model: An OpenAI or Azure OpenAI model name. - - Returns: - : An OpenAI model name - """ - if model in azure_openai_model_mapping: - return azure_openai_model_mapping[model] - else: - return model # pyright: ignore[reportGeneralTypeIssues]