diff --git a/SimplerLLM/language/llm.py b/SimplerLLM/language/llm.py index 7ed8a82..9607577 100644 --- a/SimplerLLM/language/llm.py +++ b/SimplerLLM/language/llm.py @@ -3,6 +3,8 @@ import SimplerLLM.language.llm_providers.anthropic_llm as anthropic_llm import SimplerLLM.language.llm_providers.ollama_llm as ollama_llm import SimplerLLM.language.llm_providers.lwh_llm as lwh_llm +import SimplerLLM.language.llm_providers.azureopenai_llm as azureopenai_llm + from SimplerLLM.prompts.messages_template import MessagesTemplate from enum import Enum import os @@ -14,6 +16,7 @@ class LLMProvider(Enum): ANTHROPIC = 3 OLLAMA = 4 LWH = 5 + AZUREOPENAI = 6 class LLM: @@ -53,6 +56,9 @@ def create( return OllamaLLM(provider, model_name, temperature, top_p) if provider == LLMProvider.LWH: return LwhLLM(provider, model_name, temperature, top_p, api_key, user_id) + if provider == LLMProvider.AZUREOPENAI: + return AzureOpenAILLM(provider, model_name, temperature, top_p, api_key) + else: return None @@ -69,6 +75,100 @@ def prepare_params(self, model_name, temperature, top_p): "top_p": top_p if top_p else self.top_p, } +class AzureOpenAILLM(LLM): + def __init__(self, provider, model_name, temperature, top_p, api_key): + super().__init__(provider, model_name, temperature, top_p, api_key) + self.api_key = api_key or os.getenv("AZUREOPENAI_API_KEY", "") + + + def append_messages(self, system_prompt : str, messages: list): + model_messages = [{"role": "system", "content": system_prompt}] + if messages: + model_messages.extend(messages) + return model_messages + + + + def generate_response( + self, + model_name: str =None, + prompt: str = None, + messages: list = None, + system_prompt: str="You are a helpful AI Assistant", + temperature: float=0.7, + max_tokens: int=300, + top_p: float=1.0, + full_response: bool=False, + ): + params = self.prepare_params(model_name, temperature, top_p) + + # Validate inputs + if prompt and messages: + raise ValueError("Only one of 'prompt' or 'messages' should be provided.") + if not prompt and not messages: + raise ValueError("Either 'prompt' or 'messages' must be provided.") + + # Prepare messages based on input type + if prompt: + model_messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ] + + if messages: + model_messages = self.append_messages(system_prompt, messages) + + + + params.update( + { + "api_key": self.api_key, + "messages": model_messages, + "max_tokens": max_tokens, + "full_response": full_response, + } + ) + return azureopenai_llm.generate_response(**params) + + async def generate_response_async( + self, + model_name: str =None, + prompt: str = None, + messages: list = None, + system_prompt: str="You are a helpful AI Assistant", + temperature: float=0.7, + max_tokens: int=300, + top_p: float=1.0, + full_response: bool=False, + ): + params = self.prepare_params(model_name, temperature, top_p) + + # Validate inputs + if prompt and messages: + raise ValueError("Only one of 'prompt' or 'messages' should be provided.") + if not prompt and not messages: + raise ValueError("Either 'prompt' or 'messages' must be provided.") + + # Prepare messages based on input type + if prompt: + model_messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ] + + if messages: + model_messages = self.append_messages(system_prompt, messages) + + + params.update( + { + "api_key": self.api_key, + "messages": model_messages, + "max_tokens": max_tokens, + "full_response": full_response, + } + ) + return await azureopenai_llm.generate_response_async(**params) @@ -563,4 +663,4 @@ async def generate_response_async( "max_tokens" : max_tokens } ) - return await lwh_llm.generate_response_async(**params) \ No newline at end of file + return await lwh_llm.generate_response_async(**params) diff --git a/SimplerLLM/language/llm_providers/azureopenai_llm.py b/SimplerLLM/language/llm_providers/azureopenai_llm.py new file mode 100644 index 0000000..682c305 --- /dev/null +++ b/SimplerLLM/language/llm_providers/azureopenai_llm.py @@ -0,0 +1,211 @@ +# add streaming +from openai import AsyncAzureOpenAI +from openai import AzureOpenAI +from dotenv import load_dotenv +import asyncio +import os +import time +from .llm_response_models import LLMFullResponse,LLMEmbeddingsResponse + +# Load environment variables +load_dotenv() + +MAX_RETRIES = int(os.getenv("MAX_RETRIES", 3)) +RETRY_DELAY = int(os.getenv("RETRY_DELAY", 2)) + + +def generate_response( + model_name, + messages=None, + temperature=0.7, + max_tokens=300, + top_p=1.0, + full_response=False, + api_key = None, +): + start_time = time.time() if full_response else None + openai_client = AzureOpenAI(api_key=api_key, + api_version="2024-07-01-preview", + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") + ) + + for attempt in range(MAX_RETRIES): + try: + completion = openai_client.chat.completions.create( + model=model_name, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + ) + generated_text = completion.choices[0].message.content + + if full_response: + end_time = time.time() + process_time = end_time - start_time + return LLMFullResponse( + generated_text=generated_text, + model=model_name, + process_time=process_time, + llm_provider_response=completion, + ) + return generated_text + + except Exception as e: + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_DELAY * (2**attempt)) + else: + error_msg = f"Failed after {MAX_RETRIES} attempts" + if full_response: + end_time = time.time() + process_time = end_time - start_time + error_msg += f" and {process_time} seconds" + error_msg += f" due to: {e}" + print(error_msg) + return None + +async def generate_response_async( + model_name, + messages=None, + temperature=0.7, + max_tokens=300, + top_p=1.0, + full_response=False, + api_key = None, +): + start_time = time.time() if full_response else None + async_openai_client = AsyncAzureOpenAI(api_key=api_key, + api_version="2024-07-01-preview", + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") + ) + + for attempt in range(MAX_RETRIES): + try: + completion = await async_openai_client.chat.completions.create( + model=model_name, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + ) + generated_text = completion.choices[0].message.content + + if full_response: + end_time = time.time() + process_time = end_time - start_time + return LLMFullResponse( + generated_text=generated_text, + model=model_name, + process_time=process_time, + llm_provider_response=completion, + ) + return generated_text + + except Exception as e: + if attempt < MAX_RETRIES - 1: + await asyncio.sleep(RETRY_DELAY * (2**attempt)) + else: + error_msg = f"Failed after {MAX_RETRIES} attempts" + if full_response: + end_time = time.time() + process_time = end_time - start_time + error_msg += f" and {process_time} seconds" + error_msg += f" due to: {e}" + print(error_msg) + return None + +def generate_embeddings( + model_name, + user_input=None, + full_response = False, + api_key = None +): + + if not user_input: + raise ValueError("user_input must be provided.") + + start_time = time.time() if full_response else None + + openai_client = AzureOpenAI(api_key=api_key, + api_version="2024-07-01-preview", + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") + ) + + for attempt in range(MAX_RETRIES): + try: + + response = openai_client.embeddings.create( + model= model_name, + input=user_input + ) + generate_embeddings = response.data + + if full_response: + end_time = time.time() + process_time = end_time - start_time + return LLMEmbeddingsResponse( + generated_embedding=generate_embeddings, + model=model_name, + process_time=process_time, + llm_provider_response=response, + ) + return generate_embeddings + + except Exception as e: + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_DELAY * (2**attempt)) + else: + error_msg = f"Failed after {MAX_RETRIES} attempts" + if full_response: + end_time = time.time() + process_time = end_time - start_time + error_msg += f" and {process_time} seconds" + error_msg += f" due to: {e}" + print(error_msg) + return None + +async def generate_embeddings_async( + model_name, + user_input=None, + full_response = False, + api_key = None, +): + async_openai_client = AsyncAzureOpenAI(api_key=api_key, + api_version="2024-07-01-preview", + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") + ) + if not user_input: + raise ValueError("user_input must be provided.") + + start_time = time.time() if full_response else None + for attempt in range(MAX_RETRIES): + try: + result = await async_openai_client.embeddings.create( + model=model_name, + input=user_input, + ) + generate_embeddings = result.data + + if full_response: + end_time = time.time() + process_time = end_time - start_time + return LLMEmbeddingsResponse( + generated_embedding=generate_embeddings, + model=model_name, + process_time=process_time, + llm_provider_response=result, + ) + return generate_embeddings + + except Exception as e: + if attempt < MAX_RETRIES - 1: + await asyncio.sleep(RETRY_DELAY * (2**attempt)) + else: + error_msg = f"Failed after {MAX_RETRIES} attempts" + if full_response: + end_time = time.time() + process_time = end_time - start_time + error_msg += f" and {process_time} seconds" + error_msg += f" due to: {e}" + print(error_msg) + return None \ No newline at end of file diff --git a/readme.md b/readme.md index 3a96a0f..cf458ce 100644 --- a/readme.md +++ b/readme.md @@ -47,6 +47,9 @@ VALUE_SERP_API_KEY="your_value_serp_api_key_here" #for Google search SERPER_API_KEY="your_serper_api_key_here" #for Google search STABILITY_API_KEY="your_stability_api_key_here" #for image generation +AZUREOPENAI_API_KEY="your_azureopenai_api_key_here" +AZURE_OPENAI_ENDPOINT='your_azureopenai_endpoint_url_here' + ``` ### Creating an LLM Instance