diff --git a/.capy/pr-body-cap-2-41193abd.md b/.capy/pr-body-cap-2-41193abd.md new file mode 100644 index 0000000..20dcaa5 --- /dev/null +++ b/.capy/pr-body-cap-2-41193abd.md @@ -0,0 +1,32 @@ +## Summary +Migrate Emplode remote model invocation to LiteLLM OpenAI Responses API and set default models to GPT‑5 family with high reasoning effort. + +## Changes +- Default model set to `gpt-5`. +- `emplode --fast` now uses `gpt-5-nano`. +- Switched remote (non-local) inference from streaming Chat Completions to the Responses API via LiteLLM, with: + - `reasoning: { effort: "high" }` for both default and fast modes. + - `max_output_tokens` wired to Emplode’s `max_tokens`. + - Custom `run_code` tool defined for function-calling. +- Updated CLI help text and user-facing messages from GPT-4/4o to GPT-5. +- Local mode behavior remains unchanged. + +## Nature of change +Enhancement / Refactor (API migration + model defaults). + +## Impact +- Non-breaking for local mode. +- Remote mode now uses the Responses API non-streaming flow. Assistant text and any `run_code` tool calls are parsed and executed as before. If streaming is required later, we can enable `stream=True` with the Responses API. +- Azure path continues to work via `model=f"azure/"` using the existing environment variables. + +## Why +- Align Emplode with the newer OpenAI Responses API through LiteLLM for better support of reasoning settings and future features. +- Standardize on GPT‑5 family and expose a fast option (`gpt-5-nano`) while keeping reasoning quality high. + +## Configuration notes +- OPENAI_API_KEY required for OpenAI. +- Optional: `--api_base` for custom OpenAI-compatible backends (uses `custom/` path). +- Azure: `AZURE_API_KEY` or `OPENAI_API_KEY`, `AZURE_API_BASE`, `AZURE_API_VERSION`, `AZURE_DEPLOYMENT_NAME` (wired to Responses API). + + +₍ᐢ•(ܫ)•ᐢ₎ Generated by [Capy](https://capy.ai) ([view task](https://capy.ai/project/745bca9f-f0c1-4eec-a38c-65f51326313d/task/41193abd-f8aa-4b79-ac4d-8907503cb7cd)) \ No newline at end of file diff --git a/emplode/cli.py b/emplode/cli.py index ad170d0..10ebe73 100644 --- a/emplode/cli.py +++ b/emplode/cli.py @@ -43,7 +43,7 @@ def cli(emplode): '--fast', action='store_true', default=FAST_MODE, - help='use gpt-4o-mini instead of gpt-4o') + help='use gpt-5-nano instead of gpt-5') parser.add_argument('-l', '--local', action='store_true', @@ -103,7 +103,7 @@ def cli(emplode): if args.yes: emplode.auto_run = True if args.fast: - emplode.model = "gpt-4o-mini" + emplode.model = "gpt-5-nano" if args.local and not args.falcon: rprint('', Markdown("**Emplode** will use `Code Llama` for local execution."), '') diff --git a/emplode/emplode.py b/emplode/emplode.py index f30176c..8f41b9a 100644 --- a/emplode/emplode.py +++ b/emplode/emplode.py @@ -46,14 +46,14 @@ missing_api_key_message = """> OpenAI API key not found -To use `GPT-4o` (recommended) please provide an OpenAI API key. +To use `GPT-5` (recommended) please provide an OpenAI API key. To use `Code-Llama` (free but less capable) press `enter`. """ missing_azure_info_message = """> Azure OpenAI Service API info not found -To use `GPT-4` (recommended) please provide an Azure OpenAI API key, a API base, a deployment name and a API version. +To use `GPT-5` (recommended) please provide an Azure OpenAI API key, an API base, a deployment name and an API version. To use `Code-Llama` (free but less capable) press `enter`. """ @@ -73,7 +73,7 @@ def __init__(self): self.api_key = None self.auto_run = False self.local = False - self.model = "gpt-4o" + self.model = "gpt-5" self.debug_mode = False self.api_base = None self.context_window = 2000 @@ -264,12 +264,12 @@ def chat(self, message=None, return_messages=False): f"\n\n**Common Fixes:** You can follow our simple setup docs at the link below to resolve common errors.\n\n```\nhttps://github.com/emplodeai/emplode/\n```", f"\n\n**If you've tried that and you're still getting an error, we have likely not built the proper `{self.model}` support for your system.**", "\n\n*( Running language models locally is a difficult task!* If you have insight into the best way to implement this across platforms/architectures, please join the Emplode community Discord and consider contributing the project's development. )", - "\n\nPress enter to switch to `GPT-4o` (recommended)." + "\n\nPress enter to switch to `GPT-5` (recommended)." ]))) input() self.local = False - self.model = "gpt-4o" + self.model = "gpt-5" self.verify_api_key() welcome_message = "" @@ -470,7 +470,15 @@ def respond(self): if self.local: messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message) else: - messages = tt.trim(self.messages, self.model, system_message=system_message) + try: + messages = tt.trim(self.messages, self.model, system_message=system_message) + except Exception: + try: + messages = tt.trim(self.messages, "gpt-4o", system_message=system_message) + except Exception: + remote_window = int(os.environ.get("EMPLODE_REMOTE_CONTEXT_WINDOW", "128000")) + budget = max(512, remote_window - self.max_tokens - 1000) + messages = tt.trim(self.messages, max_tokens=budget, system_message=system_message) if self.debug_mode: print("\n", "Sending `messages` to LLM:", "\n") @@ -478,83 +486,233 @@ def respond(self): print() if not self.local: - error = "" - - for _ in range(3): + for _ in range(3): try: - - if self.use_azure: - response = litellm.completion( - f"azure/{self.azure_deployment_name}", - messages=messages, - functions=[function_schema], - temperature=self.temperature, - stream=True, - ) - else: - if self.api_base: - response = litellm.completion( - api_base=self.api_base, - model = "custom/" + self.model, - messages=messages, - functions=[function_schema], - stream=True, - temperature=self.temperature, - ) + tools = [ + { + "type": "custom", + "name": "run_code", + "description": "Executes code on the user's machine. JSON args: {language: 'python'|'R'|'shell'|'applescript'|'javascript'|'html', code: string}" + } + ] + + def _to_responses_input(msgs): + resp = [] + for m in msgs: + role = m.get("role", "user") + content = m.get("content", "") + if role == "system": + role2 = "developer" + elif role == "function": + name = m.get("name", "run_code") + cont = m.get("content", "") + if not cont: + continue + content = f"[Tool {name} output]\n{cont}" + role2 = "user" else: - response = litellm.completion( - model=self.model, - messages=messages, - functions=[function_schema], - stream=True, - temperature=self.temperature, - ) - - break + role2 = role + if content is None: + content = "" + content = str(content) + if content.strip() == "": + continue + resp.append({"role": role2, "content": content}) + return resp + + responses_input = _to_responses_input(messages) + + if self.use_azure: + response = litellm.responses( + model=f"azure/{self.azure_deployment_name}", + input=responses_input, + tools=tools, + max_output_tokens=self.max_tokens, + reasoning={"effort": "high"} + ) + else: + if self.api_base: + response = litellm.responses( + api_base=self.api_base, + model="custom/" + self.model, + input=responses_input, + tools=tools, + max_output_tokens=self.max_tokens, + reasoning={"effort": "high"} + ) + else: + response = litellm.responses( + model=self.model, + input=responses_input, + tools=tools, + max_output_tokens=self.max_tokens, + reasoning={"effort": "high"} + ) + break except: - if self.debug_mode: - traceback.print_exc() - error = traceback.format_exc() - time.sleep(3) + if self.debug_mode: + traceback.print_exc() + error = traceback.format_exc() + time.sleep(3) else: raise Exception(error) - - elif self.local: - def messages_to_prompt(messages): + def extract_output_and_tool(resp): + output_text = "" + tool_args = None + try: + outputs = getattr(resp, 'output', None) + if outputs is None and isinstance(resp, dict): + outputs = resp.get('output') + if outputs: + for item in outputs: + typ = getattr(item, 'type', None) + if typ is None and isinstance(item, dict): + typ = item.get('type') + # collect text + try: + content_list = getattr(item, 'content', None) + if content_list is None and isinstance(item, dict): + content_list = item.get('content') + if isinstance(content_list, list): + for content in content_list: + if hasattr(content, 'text'): + output_text += getattr(content, 'text') or "" + elif isinstance(content, dict) and 'text' in content: + output_text += content.get('text') or "" + elif hasattr(item, 'text'): + output_text += getattr(item, 'text') or "" + except Exception: + pass + # detect tool call + name = getattr(item, 'name', None) + if name is None and isinstance(item, dict): + name = item.get('name') + if name == 'run_code' or (typ and 'tool' in str(typ).lower() and name): + data = getattr(item, 'input', None) + if data is None: + data = getattr(item, 'arguments', None) + if data is None and isinstance(item, dict): + data = item.get('input') or item.get('arguments') + if isinstance(data, str): + try: + tool_args = json.loads(data) + except Exception: + tool_args = None + elif isinstance(data, dict): + tool_args = data + except Exception: + if self.debug_mode: + traceback.print_exc() + return output_text, tool_args + + def extract_code_block(text): + if "```" not in text: + return None + parts = text.split("```") + # find last code block (odd index) + last = None + for i in range(len(parts)-1, 0, -1): + if i % 2 == 1: + last = parts[i] + break + if last is None: + return None + lines = last.split("\n") + if len(lines) == 0: + return None + first = lines[0].strip() + if first == "": + language = "python" + if len(lines) > 1 and lines[1].startswith("pip"): + language = "shell" + else: + language = first + if language == "bash": + language = "shell" + code = "\n".join(lines[1:]).strip("` \n") + if not code: + return None + return {"language": language, "code": code} + + output_text, tool_args = extract_output_and_tool(response) + assistant_message = {"role": "assistant"} + if output_text: + assistant_message["content"] = output_text + code_args = tool_args or extract_code_block(output_text or "") + if code_args: + assistant_message["function_call"] = {"parsed_arguments": code_args} + self.messages.append(assistant_message) + + if "function_call" in assistant_message: + self.end_active_block() + if len(self.messages) >= 2: + last_role = self.messages[-2].get("role") + if last_role == "user" or last_role == "function": + print() + self.active_block = CodeBlock() + self.active_block.update_from_message(self.messages[-1]) + + if self.auto_run == False: + language = self.active_block.language + code = self.active_block.code + response_input = input(" Would you like to run this code? (y/n)\n\n ") + print("") + if response_input.strip().lower() != "y": + self.active_block.end() + self.messages.append({ + "role": "function", + "name": "run_code", + "content": "User decided not to run this code." + }) + return + language = self.messages[-1]["function_call"]["parsed_arguments"].get("language") + if language not in self.code_emplodes: + self.code_emplodes[language] = CodeEmplode(language, self.debug_mode) + code_emplode = self.code_emplodes[language] + code_emplode.active_block = self.active_block + code_emplode.run() + self.active_block.end() + self.messages.append({ + "role": "function", + "name": "run_code", + "content": self.active_block.output if self.active_block.output else "No output" + }) + self.respond() + return + else: + self.active_block = MessageBlock() + self.active_block.update_from_message(self.messages[-1]) + self.active_block.end() + return + else: + # local mode - keep existing local streaming behavior + def messages_to_prompt(messages): for message in messages: if "role" not in message: message["role"] = "assistant" - if "falcon" in self.model.lower(): - formatted_messages = "" for message in messages: formatted_messages += f"{message['role'].capitalize()}: {message['content']}\n" formatted_messages = formatted_messages.strip() - else: - system_prompt = messages[0]['content'] formatted_messages = f"[INST] <>\n{system_prompt}\n<>\n" - for index, item in enumerate(messages[1:]): - role = item['role'] - content = item['content'] - - if role == 'user': - formatted_messages += f"{content} [/INST] " - elif role == 'function': - formatted_messages += f"Output: {content} [/INST] " - elif role == 'assistant': - formatted_messages += f"{content} [INST] " - + role = item['role'] + content = item['content'] + if role == 'user': + formatted_messages += f"{content} [/INST] " + elif role == 'function': + formatted_messages += f"Output: {content} [/INST] " + elif role == 'assistant': + formatted_messages += f"{content} [INST] " if formatted_messages.endswith("[INST] "): - formatted_messages = formatted_messages[:-10] - + formatted_messages = formatted_messages[:-10] return formatted_messages prompt = messages_to_prompt(messages) @@ -565,7 +723,6 @@ def messages_to_prompt(messages): elif messages[-1]["role"] == "function" and messages[-1]["content"] == "No output": prompt += "Given the fact that the code I just ran produced no output, " - if self.debug_mode: import builtins builtins.print("TEXT PROMPT SEND TO LLM:\n", prompt) @@ -575,70 +732,41 @@ def messages_to_prompt(messages): stream=True, temperature=self.temperature, stop=[""], - max_tokens=750 + max_tokens=750 ) - self.messages.append({}) - in_function_call = False - llama_function_call_finished = False - self.active_block = None - - for chunk in response: - if self.use_azure and ('choices' not in chunk or len(chunk['choices']) == 0): - continue + self.messages.append({}) + in_function_call = False + llama_function_call_finished = False + self.active_block = None - if self.local: + for chunk in response: + if self.use_azure and ('choices' not in chunk or len(chunk['choices']) == 0): + continue if "content" not in messages[-1]: chunk["choices"][0]["text"] = chunk["choices"][0]["text"].capitalize() messages[-1]["role"] = "assistant" delta = {"content": chunk["choices"][0]["text"]} - else: - delta = chunk["choices"][0]["delta"] - - self.messages[-1] = merge_deltas(self.messages[-1], delta) - - if not self.local: - condition = "function_call" in self.messages[-1] - elif self.local: + self.messages[-1] = merge_deltas(self.messages[-1], delta) if "content" in self.messages[-1]: condition = self.messages[-1]["content"].count("```") % 2 == 1 else: condition = False - - if condition: - if in_function_call == False: - - self.end_active_block() - - last_role = self.messages[-2]["role"] - if last_role == "user" or last_role == "function": - print() - - self.active_block = CodeBlock() - - in_function_call = True - - if not self.local: - if "arguments" in self.messages[-1]["function_call"]: - arguments = self.messages[-1]["function_call"]["arguments"] - new_parsed_arguments = parse_partial_json(arguments) - if new_parsed_arguments: - self.messages[-1]["function_call"][ - "parsed_arguments"] = new_parsed_arguments - - elif self.local: + if condition: + if in_function_call == False: + self.end_active_block() + last_role = self.messages[-2]["role"] + if last_role == "user" or last_role == "function": + print() + self.active_block = CodeBlock() + in_function_call = True if "content" in self.messages[-1]: - content = self.messages[-1]["content"] - if "```" in content: blocks = content.split("```") - current_code_block = blocks[-1] - lines = current_code_block.split("\n") - - if content.strip() == "```": + if content.strip() == "```": language = None else: if lines[0] != "": @@ -648,109 +776,62 @@ def messages_to_prompt(messages): if len(lines) > 1: if lines[1].startswith("pip"): language = "shell" - code = '\n'.join(lines[1:]).strip("` \n") - arguments = {"code": code} - if language: + if language: if language == "bash": language = "shell" arguments["language"] = language - if "function_call" not in self.messages[-1]: self.messages[-1]["function_call"] = {} - self.messages[-1]["function_call"]["parsed_arguments"] = arguments - - else: - if in_function_call == True: - - if self.local: - + else: + if in_function_call == True: llama_function_call_finished = True - - in_function_call = False - - if self.active_block == None: - - self.active_block = MessageBlock() - - self.active_block.update_from_message(self.messages[-1]) - - if chunk["choices"][0]["finish_reason"] or llama_function_call_finished: - if chunk["choices"][ - 0]["finish_reason"] == "function_call" or llama_function_call_finished: - - if self.debug_mode: - print("Running function:") - print(self.messages[-1]) - print("---") - - if self.auto_run == False: - - self.active_block.end() - language = self.active_block.language - code = self.active_block.code - - response = input(" Would you like to run this code? (y/n)\n\n ") - print("") - - if response.strip().lower() == "y": - self.active_block = CodeBlock() - self.active_block.language = language - self.active_block.code = code - - else: + in_function_call = False + if self.active_block == None: + self.active_block = MessageBlock() + self.active_block.update_from_message(self.messages[-1]) + if chunk["choices"][0]["finish_reason"] or llama_function_call_finished: + if llama_function_call_finished: + if self.debug_mode: + print("Running function:") + print(self.messages[-1]) + print("---") + if self.auto_run == False: self.active_block.end() - self.messages.append({ - "role": - "function", - "name": - "run_code", - "content": - "User decided not to run this code." - }) - return - - if not self.local and "parsed_arguments" not in self.messages[-1]["function_call"]: - + language = self.active_block.language + code = self.active_block.code + response_input = input(" Would you like to run this code? (y/n)\n\n ") + print("") + if response_input.strip().lower() != "y": + self.active_block.end() + self.messages.append({ + "role": "function", + "name": "run_code", + "content": "User decided not to run this code." + }) + return + language = self.messages[-1]["function_call"]["parsed_arguments"]["language"] + if language not in self.code_emplodes: + self.code_emplodes[language] = CodeEmplode(language, self.debug_mode) + code_emplode = self.code_emplodes[language] + code_emplode.active_block = self.active_block + code_emplode.run() + self.active_block.end() self.messages.append({ "role": "function", "name": "run_code", - "content": """Your function call could not be parsed. Please use ONLY the `run_code` function, which takes two parameters: `code` and `language`. Your response should be formatted as a JSON.""" + "content": self.active_block.output if self.active_block.output else "No output" }) - self.respond() + if not llama_function_call_finished: + if "content" in self.messages[-1]: + self.messages[-1]["content"] = self.messages[-1]["content"].strip().rstrip("#") + self.active_block.update_from_message(self.messages[-1]) + time.sleep(0.1) + self.active_block.end() return - language = self.messages[-1]["function_call"]["parsed_arguments"][ - "language"] - if language not in self.code_emplodes: - self.code_emplodes[language] = CodeEmplode(language, self.debug_mode) - code_emplode = self.code_emplodes[language] - - code_emplode.active_block = self.active_block - code_emplode.run() - - self.active_block.end() - - self.messages.append({ - "role": "function", - "name": "run_code", - "content": self.active_block.output if self.active_block.output else "No output" - }) - - self.respond() - - if chunk["choices"][0]["finish_reason"] != "function_call": - - if self.local and "content" in self.messages[-1]: - self.messages[-1]["content"] = self.messages[-1]["content"].strip().rstrip("#") - self.active_block.update_from_message(self.messages[-1]) - time.sleep(0.1) - - self.active_block.end() - return - def _print_welcome_message(self): print("", "", Markdown(f"\nWelcome to **Emplode**.\n"), "")