From 183cb2bb5514b81eced89287006a47d8036a06fc Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Mon, 8 Sep 2025 07:09:53 +0000
Subject: [PATCH 1/9] Capy jam: Standardize on GPT-5 and remove LiteLLM to
 simplify model usage and future-proof the agent; update dependencies and docs
 accordingly

Co-authored-by: Capy <capy@capy.ai>
---
 README.md             |  40 +---
 emplode/cli.py        | 104 ----------
 emplode/emplode.py    | 446 +++++++-----------------------------------
 emplode/get_hf_llm.py | 291 ---------------------------
 pyproject.toml        |  26 +--
 5 files changed, 88 insertions(+), 819 deletions(-)
 delete mode 100644 emplode/get_hf_llm.py
diff --git a/README.md b/README.md
index 12d18d3..bb8dfcc 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 
 <br>
 
-**Emplode** Agent performs actions on your system by executing code locally, It can also serve as an agentic framework for your disposable sandbox projects. You can chat with Emplode in your terminal by running `emplode` after installing.
+**Emplode** performs actions on your system by executing code locally. You can chat with Emplode in your terminal by running `emplode` after installing.
 
 This provides a natural-language interface to your system's general-purpose capabilities:
 
@@ -46,35 +46,11 @@ emplode.chat() # Starts an interactive chat
 
 ## Commands
 
-### Change the Model
-
-For `gpt-3.5-turbo`, use fast mode:
-
-```shell
-emplode --fast
-```
-
-In Python, you will need to set the model manually:
-
-```python
-emplode.model = "gpt-3.5-turbo"
-```
-
-### Running Emplode locally
-
-You can run `emplode` in local mode from the command line to use `Code Llama`:
-
-```shell
-emplode --local
-```
-
-Or run any Hugging Face model **locally** by using its repo ID (e.g. "tiiuae/falcon-180B"):
-
-```shell
-emplode --model nvidia/Llama-3.1-Nemotron-70B-Instruct
-emplode --model meta-llama/Llama-3.2-11B-Vision-Instruct
-```
+Emplode now uses a single model, `gpt-5`, everywhere. There is no model selection and no local model support.
 
+- `-y`, `--yes`: execute code without user confirmation
+- `-d`, `--debug`: prints extra information
+- `--version`: display current Emplode version
 
 ### Configuration with .env
 
@@ -84,15 +60,13 @@ Here's a sample .env configuration:
 
 ```
 EMPLODE_CLI_AUTO_RUN=False
-EMPLODE_CLI_FAST_MODE=False
-EMPLODE_CLI_LOCAL_RUN=False
 EMPLODE_CLI_DEBUG=False
 ```
 
-You can modify these values in the .env file to change the default behavior of the Emplode
+You can modify these values in the .env file to change the default behavior of Emplode.
 
 ## How Does it Work?
 
-Emplode equips a [function-calling model](https://platform.openai.com/docs/guides/gpt/function-calling) with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
+Emplode equips a function-calling model with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
 
 <br>
diff --git a/emplode/cli.py b/emplode/cli.py
index ad170d0..6b7c94a 100644
--- a/emplode/cli.py
+++ b/emplode/cli.py
@@ -6,7 +6,6 @@
 import pkg_resources
 from rich import print as rprint
 from rich.markdown import Markdown
-import inquirer
 
 load_dotenv()
 
@@ -27,10 +26,7 @@ def cli(emplode):
     pass
 
   AUTO_RUN = os.getenv('EMPLODE_CLI_AUTO_RUN', 'False') == 'True'
-  FAST_MODE = os.getenv('EMPLODE_CLI_FAST_MODE', 'False') == 'True'
-  LOCAL_RUN = os.getenv('EMPLODE_CLI_LOCAL_RUN', 'False') == 'True'
   DEBUG = os.getenv('EMPLODE_CLI_DEBUG', 'False') == 'True'
-  USE_AZURE = os.getenv('EMPLODE_CLI_USE_AZURE', 'False') == 'True'
 
   parser = argparse.ArgumentParser(description='Command Emplode.')
   
@@ -39,126 +35,26 @@ def cli(emplode):
                       action='store_true',
                       default=AUTO_RUN,
                       help='execute code without user confirmation')
-  parser.add_argument('-f',
-                      '--fast',
-                      action='store_true',
-                      default=FAST_MODE,
-                      help='use gpt-4o-mini instead of gpt-4o')
-  parser.add_argument('-l',
-                      '--local',
-                      action='store_true',
-                      default=LOCAL_RUN,
-                      help='run fully local with code-llama')
-  parser.add_argument(
-                      '--falcon',
-                      action='store_true',
-                      default=False,
-                      help='run fully local with falcon-40b')
   parser.add_argument('-d',
                       '--debug',
                       action='store_true',
                       default=DEBUG,
                       help='prints extra information')
   
-  parser.add_argument('--model',
-                      type=str,
-                      help='model name (for OpenAI compatible APIs) or HuggingFace repo',
-                      default="",
-                      required=False)
-  
-  parser.add_argument('--max_tokens',
-                      type=int,
-                      help='max tokens generated (for locally run models)')
-  parser.add_argument('--context_window',
-                      type=int,
-                      help='context window in tokens (for locally run models)')
-  
-  parser.add_argument('--api_base',
-                      type=str,
-                      help='change your api_base to any OpenAI compatible api',
-                      default="",
-                      required=False)
-  
-  parser.add_argument('--use-azure',
-                      action='store_true',
-                      default=USE_AZURE,
-                      help='use Azure OpenAI Services')
-  
   parser.add_argument('--version',
                       action='store_true',
                       help='display current Emplode version')
 
   args = parser.parse_args()
 
-
   if args.version:
     print("Emplode", pkg_resources.get_distribution("emplode").version)
     return
 
-  if args.max_tokens:
-    emplode.max_tokens = args.max_tokens
-  if args.context_window:
-    emplode.context_window = args.context_window
-
   if args.yes:
     emplode.auto_run = True
-  if args.fast:
-    emplode.model = "gpt-4o-mini"
-  if args.local and not args.falcon:
-    
-    rprint('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
-        
-    models = {
-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-        '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-        '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-    }
-    
-    parameter_choices = list(models.keys())
-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-    answers = inquirer.prompt(questions)
-    chosen_param = answers['param']
 
-    emplode.model = models[chosen_param]
-    emplode.local = True
-
-  
   if args.debug:
     emplode.debug_mode = True
-  if args.use_azure:
-    emplode.use_azure = True
-    emplode.local = False
-
-
-  if args.model != "":
-    emplode.model = args.model
-
-    if "/" in emplode.model:
-      emplode.local = True
-
-  if args.api_base:
-    emplode.api_base = args.api_base
-
-  if args.falcon or args.model == "tiiuae/falcon-180B":
-    
-    rprint('', Markdown("**Emplode** will use `Falcon` for local execution."), '')
-        
-    models = {
-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-        '40B': 'YokaiKoibito/falcon-40b-GGUF',
-        '180B': 'TheBloke/Falcon-180B-Chat-GGUF'
-    }
-    
-    parameter_choices = list(models.keys())
-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-    answers = inquirer.prompt(questions)
-    chosen_param = answers['param']
-
-    if chosen_param == "180B":
-      rprint(Markdown("> **WARNING:** To run `Falcon-180B` we recommend at least `100GB` of RAM."))
-
-    emplode.model = models[chosen_param]
-    emplode.local = True
-
 
   emplode.chat()
diff --git a/emplode/emplode.py b/emplode/emplode.py
index f30176c..3daab29 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -3,17 +3,13 @@
 from .message_block import MessageBlock
 from .code_block import CodeBlock
 from .code_emplode import CodeEmplode
-from .get_hf_llm import get_hf_llm
 
 import os
 import time
 import traceback
 import json
 import platform
-import openai
-import litellm
-import pkg_resources
-
+from openai import OpenAI
 import getpass
 import requests
 import readline
@@ -44,19 +40,7 @@
   },
 }
 
-missing_api_key_message = """> OpenAI API key not found
-
-To use `GPT-4o` (recommended) please provide an OpenAI API key.
-
-To use `Code-Llama` (free but less capable) press `enter`.
-"""
-
-missing_azure_info_message = """> Azure OpenAI Service API info not found
-
-To use `GPT-4` (recommended) please provide an Azure OpenAI API key, a API base, a deployment name and a API version.
-
-To use `Code-Llama` (free but less capable) press `enter`.
-"""
+missing_api_key_message = "> OpenAI API key not found\n\nTo use `GPT-5` please provide an OpenAI API key.\n"
 
 confirm_mode_message = """
 **Emplode** will require approval before running code. Use `emplode -y` to bypass this.
@@ -72,17 +56,10 @@ def __init__(self):
     self.temperature = 0.001
     self.api_key = None
     self.auto_run = False
-    self.local = False
-    self.model = "gpt-4o"
+    self.model = "gpt-5"
     self.debug_mode = False
-    self.api_base = None 
-    self.context_window = 2000 
+    self.context_window = 200000
     self.max_tokens = 750
-    self.use_azure = False
-    self.azure_api_base = None
-    self.azure_api_version = None
-    self.azure_deployment_name = None
-    self.azure_api_type = "azure"
     here = os.path.abspath(os.path.dirname(__file__))
     with open(os.path.join(here, 'system_message.txt'), 'r') as f:
       self.system_message = f.read().strip()
@@ -91,7 +68,7 @@ def __init__(self):
 
     self.active_block = None
 
-    self.llama_instance = None
+    self.client = None
 
   def cli(self):
     cli(self)
@@ -106,38 +83,33 @@ def get_info_for_system_message(self):
 
     info += f"[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"
 
-    if not self.local:
-
-      query = []
-      for message in self.messages[-2:]:
-        message_for_semantic_search = {"role": message["role"]}
-        if "content" in message:
-          message_for_semantic_search["content"] = message["content"]
-        if "function_call" in message and "parsed_arguments" in message["function_call"]:
-          message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
-        query.append(message_for_semantic_search)
+    query = []
+    for message in self.messages[-2:]:
+      message_for_semantic_search = {"role": message.get("role", "assistant")}
+      if "content" in message:
+        message_for_semantic_search["content"] = message["content"]
+      if "function_call" in message and "parsed_arguments" in message["function_call"]:
+        message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
+      query.append(message_for_semantic_search)
 
-      url = "https://open-procedures.replit.app/search/"
+    url = "https://open-procedures.replit.app/search/"
 
-      try:
-        relevant_procedures = requests.get(url, data=json.dumps(query)).json()["procedures"]
+    try:
+      relevant_procedures = requests.get(url, data=json.dumps(query)).json().get("procedures", [])
+      if relevant_procedures:
         info += "\n\n# Recommended Procedures\n" + "\n---\n".join(relevant_procedures) + "\nIn your plan, include steps and, if present, **EXACT CODE SNIPPETS** (especially for depracation notices, **WRITE THEM INTO YOUR PLAN -- underneath each numbered step** as they will VANISH once you execute your first line of code, so WRITE THEM DOWN NOW if you need them) from the above procedures if they are relevant to the task. Again, include **VERBATIM CODE SNIPPETS** from the procedures above if they are relevent to the task **directly in your plan.**"
-      except:
-        pass
+    except:
+      pass
 
-    elif self.local:
-      info += "\n\nTo run code, write a fenced code block (i.e ```python, R or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its output."
     return info
 
   def reset(self):
-    
     self.messages = []
     self.code_emplodes = {}
 
   def load(self, messages):
     self.messages = messages
 
-
   def handle_undo(self, arguments):
 
     if len(self.messages) == 0:
@@ -159,7 +131,7 @@ def handle_undo(self, arguments):
       if 'content' in message and message['content'] != None:
         print(Markdown(f"**Removed message:** `\"{message['content'][:30]}...\"`"))
       elif 'function_call' in message:
-        print(Markdown(f"**Removed codeblock**")) # TODO: Could add preview of code removed here.
+        print(Markdown(f"**Removed codeblock**"))
     
     print("") 
   def handle_help(self, arguments):
@@ -246,48 +218,17 @@ def handle_command(self, user_input):
 
   def chat(self, message=None, return_messages=False):
 
-    if not self.local:
-      self.verify_api_key()
-
-    if self.local:
-
-      if self.llama_instance == None:
-        try:
-          self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
-          if self.llama_instance == None:
-            return
-        except:
-          traceback.print_exc()
-
-          print(Markdown("".join([
-            f"> Failed to install `{self.model}`.",
-            f"\n\n**Common Fixes:** You can follow our simple setup docs at the link below to resolve common errors.\n\n```\nhttps://github.com/emplodeai/emplode/\n```",
-            f"\n\n**If you've tried that and you're still getting an error, we have likely not built the proper `{self.model}` support for your system.**",
-            "\n\n*( Running language models locally is a difficult task!* If you have insight into the best way to implement this across platforms/architectures, please join the Emplode community Discord and consider contributing the project's development. )",
-            "\n\nPress enter to switch to `GPT-4o` (recommended)."
-          ])))
-          input()
-
-          self.local = False
-          self.model = "gpt-4o"
-          self.verify_api_key()
+    self.verify_api_key()
 
     welcome_message = ""
 
     if self.debug_mode:
       welcome_message += "> Entered debug mode"
 
-    if not self.local and not self.auto_run:
-
-      if self.use_azure:
-        notice_model = f"{self.azure_deployment_name} (Azure)"
-      else:
-        notice_model = f"{self.model.upper()}"
-      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To run locally, use `emplode --local`"
-      
-    if self.local:
-      welcome_message += f"\n> Model set to `{self.model}`"
-
+    if not self.auto_run:
+      notice_model = f"{self.model.upper()}"
+      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To auto-run code, use `emplode -y`"
+    
     if not self.auto_run:
       welcome_message += "\n\n" + confirm_mode_message
 
@@ -326,132 +267,34 @@ def chat(self, message=None, return_messages=False):
         except KeyboardInterrupt:
           pass
         finally:
-      
           self.end_active_block()
 
     if return_messages:
         return self.messages
 
   def verify_api_key(self):
-    if self.use_azure:
-      all_env_available = (
-        ('AZURE_API_KEY' in os.environ or 'OPENAI_API_KEY' in os.environ) and
-        'AZURE_API_BASE' in os.environ and
-        'AZURE_API_VERSION' in os.environ and
-        'AZURE_DEPLOYMENT_NAME' in os.environ)
-      if all_env_available:
-        self.api_key = os.environ.get('AZURE_API_KEY') or os.environ['OPENAI_API_KEY']
-        self.azure_api_base = os.environ['AZURE_API_BASE']
-        self.azure_api_version = os.environ['AZURE_API_VERSION']
-        self.azure_deployment_name = os.environ['AZURE_DEPLOYMENT_NAME']
-        self.azure_api_type = os.environ.get('AZURE_API_TYPE', 'azure')
+    if self.api_key is None:
+      if 'OPENAI_API_KEY' in os.environ:
+        self.api_key = os.environ['OPENAI_API_KEY']
       else:
         self._print_welcome_message()
         time.sleep(1)
 
         print(Rule(style="white"))
 
-        print(Markdown(missing_azure_info_message), '', Rule(style="white"), '')
-        response = input("Azure OpenAI API key: ")
+        print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
+        response = input("OpenAI API key: ")
 
         if response == "":
-
-          print(Markdown(
-            "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
-                '')
-          time.sleep(2)
-          print(Rule(style="white"))
-
-          import inquirer
-
-          print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
-
-          models = {
-              '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-              '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-              '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-          }
-
-          parameter_choices = list(models.keys())
-          questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-          answers = inquirer.prompt(questions)
-          chosen_param = answers['param']
-
-          self.model = models[chosen_param]
-          self.local = True
-
-
-
-
-          return
-
+          raise Exception("OpenAI API key is required to use Emplode with GPT-5.")
         else:
           self.api_key = response
-          self.azure_api_base = input("Azure OpenAI API base: ")
-          self.azure_deployment_name = input("Azure OpenAI deployment name of GPT: ")
-          self.azure_api_version = input("Azure OpenAI API version: ")
-          print('', Markdown(
-            "**Tip:** To save this key for later, run `export AZURE_API_KEY=your_api_key AZURE_API_BASE=your_api_base AZURE_API_VERSION=your_api_version AZURE_DEPLOYMENT_NAME=your_gpt_deployment_name` on Mac/Linux or `setx AZURE_API_KEY your_api_key AZURE_API_BASE your_api_base AZURE_API_VERSION your_api_version AZURE_DEPLOYMENT_NAME your_gpt_deployment_name` on Windows."),
-                '')
+          print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
           time.sleep(2)
           print(Rule(style="white"))
 
-      litellm.api_type = self.azure_api_type
-      litellm.api_base = self.azure_api_base
-      litellm.api_version = self.azure_api_version
-      litellm.api_key = self.api_key
-    else:
-      if self.api_key == None:
-        if 'OPENAI_API_KEY' in os.environ:
-          self.api_key = os.environ['OPENAI_API_KEY']
-        else:
-          self._print_welcome_message()
-          time.sleep(1)
-
-          print(Rule(style="white"))
-
-          print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
-          response = input("OpenAI API key: ")
-
-          if response == "":
-
-              print(Markdown(
-                "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
-                    '')
-              time.sleep(2)
-              print(Rule(style="white"))
-
-              import inquirer
-
-              print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
-
-              models = {
-                  '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-                  '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-                  '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-              }
-
-              parameter_choices = list(models.keys())
-              questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-              answers = inquirer.prompt(questions)
-              chosen_param = answers['param']
-              self.model = models[chosen_param]
-              self.local = True
-
-
-
-
-              return
-
-          else:
-              self.api_key = response
-              print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
-              time.sleep(2)
-              print(Rule(style="white"))
-
-      litellm.api_key = self.api_key
-      if self.api_base:
-        litellm.api_base = self.api_base
+    if self.client is None:
+      self.client = OpenAI(api_key=self.api_key)
 
   def end_active_block(self):
     if self.active_block:
@@ -461,149 +304,51 @@ def end_active_block(self):
   def respond(self):
     info = self.get_info_for_system_message()
 
-    if self.local:
-      self.system_message = "\n".join(self.system_message.split("\n")[:2])
-      self.system_message += "\nOnly do what the user asks you to do, then ask what they'd like to do next."
-
     system_message = self.system_message + "\n\n" + info
 
-    if self.local:
-      messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
-    else:
-      messages = tt.trim(self.messages, self.model, system_message=system_message)
+    messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
 
     if self.debug_mode:
       print("\n", "Sending `messages` to LLM:", "\n")
       print(messages)
       print()
 
-    if not self.local:
-      
-      error = ""
-      
-      for _ in range(3): 
-        try:
-
-            if self.use_azure:
-              response = litellm.completion(
-                  f"azure/{self.azure_deployment_name}",
-                  messages=messages,
-                  functions=[function_schema],
-                  temperature=self.temperature,
-                  stream=True,
-                  )
-            else:
-              if self.api_base:
-                response = litellm.completion(
-                  api_base=self.api_base,
-                  model = "custom/" + self.model,
-                  messages=messages,
-                  functions=[function_schema],
-                  stream=True,
-                  temperature=self.temperature,
-                )
-              else:
-                response = litellm.completion(
-                  model=self.model,
-                  messages=messages,
-                  functions=[function_schema],
-                  stream=True,
-                  temperature=self.temperature,
-                )
-
-            break
-        except:
-            if self.debug_mode:
-              traceback.print_exc()
-            error = traceback.format_exc()
-            time.sleep(3)
-      else:
-        raise Exception(error)
-            
-    elif self.local:
-
-      def messages_to_prompt(messages):
-
+    error = ""
 
-        for message in messages:
-          if "role" not in message:
-            message["role"] = "assistant"
-
-        if "falcon" in self.model.lower():
-
-          formatted_messages = ""
-          for message in messages:
-            formatted_messages += f"{message['role'].capitalize()}: {message['content']}\n"
-          formatted_messages = formatted_messages.strip()
-
-        else:
-          
-          system_prompt = messages[0]['content']
-          formatted_messages = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"
-
-          for index, item in enumerate(messages[1:]):
-              role = item['role']
-              content = item['content']
-
-              if role == 'user':
-                  formatted_messages += f"{content} [/INST] "
-              elif role == 'function':
-                  formatted_messages += f"Output: {content} [/INST] "
-              elif role == 'assistant':
-                  formatted_messages += f"{content} </s><s>[INST] "
-
-          if formatted_messages.endswith("<s>[INST] "):
-              formatted_messages = formatted_messages[:-10]
-
-        return formatted_messages
-
-      prompt = messages_to_prompt(messages)
-      if messages[-1]["role"] != "function":
-        prompt += "Let's explore this. By the way, I can run code on your machine by writing the code in a markdown code block. This works for shell, javascript, python, R, and applescript. I'm going to try to do this for your task. Anyway, "
-      elif messages[-1]["role"] == "function" and messages[-1]["content"] != "No output":
-        prompt += "Given the output of the code I just ran, "
-      elif messages[-1]["role"] == "function" and messages[-1]["content"] == "No output":
-        prompt += "Given the fact that the code I just ran produced no output, "
-
-
-      if self.debug_mode:
-        import builtins
-        builtins.print("TEXT PROMPT SEND TO LLM:\n", prompt)
-
-      response = self.llama_instance(
-        prompt,
-        stream=True,
-        temperature=self.temperature,
-        stop=["</s>"],
-        max_tokens=750 
-      )
+    for _ in range(3): 
+      try:
+        response = self.client.chat.completions.create(
+          model=self.model,
+          messages=messages,
+          functions=[function_schema],
+          temperature=self.temperature,
+          stream=True,
+        )
+        break
+      except:
+        if self.debug_mode:
+          traceback.print_exc()
+        error = traceback.format_exc()
+        time.sleep(3)
+    else:
+      raise Exception(error)
 
     self.messages.append({})
     in_function_call = False
-    llama_function_call_finished = False
     self.active_block = None
 
     for chunk in response:
-      if self.use_azure and ('choices' not in chunk or len(chunk['choices']) == 0):
-        continue
-
-      if self.local:
-        if "content" not in messages[-1]:
-          chunk["choices"][0]["text"] = chunk["choices"][0]["text"].capitalize()
-          messages[-1]["role"] = "assistant"
-        delta = {"content": chunk["choices"][0]["text"]}
-      else:
-        delta = chunk["choices"][0]["delta"]
+      try:
+        chunk_dict = chunk.model_dump()
+      except Exception:
+        chunk_dict = chunk
+
+      delta = chunk_dict.get("choices", [{}])[0].get("delta", {})
+      finish_reason = chunk_dict.get("choices", [{}])[0].get("finish_reason")
 
       self.messages[-1] = merge_deltas(self.messages[-1], delta)
 
-      if not self.local:
-        condition = "function_call" in self.messages[-1]
-      elif self.local:
-        if "content" in self.messages[-1]:
-          condition = self.messages[-1]["content"].count("```") % 2 == 1
-        else:
-          condition = False
+      condition = "function_call" in self.messages[-1]
 
       if condition:
         if in_function_call == False:
@@ -618,68 +363,24 @@ def messages_to_prompt(messages):
 
         in_function_call = True
 
-        if not self.local:
-          if "arguments" in self.messages[-1]["function_call"]:
-            arguments = self.messages[-1]["function_call"]["arguments"]
-            new_parsed_arguments = parse_partial_json(arguments)
-            if new_parsed_arguments:
-              self.messages[-1]["function_call"][
-                "parsed_arguments"] = new_parsed_arguments
-
-        elif self.local:
-          if "content" in self.messages[-1]:
-
-            content = self.messages[-1]["content"]
-
-            if "```" in content:
-              blocks = content.split("```")
-
-              current_code_block = blocks[-1]
-
-              lines = current_code_block.split("\n")
-
-              if content.strip() == "```": 
-                language = None
-              else:
-                if lines[0] != "":
-                  language = lines[0].strip()
-                else:
-                  language = "python"
-                  if len(lines) > 1:
-                    if lines[1].startswith("pip"):
-                      language = "shell"
-
-              code = '\n'.join(lines[1:]).strip("` \n")
-
-              arguments = {"code": code}
-              if language: 
-                if language == "bash":
-                  language = "shell"
-                arguments["language"] = language
-
-            if "function_call" not in self.messages[-1]:
-              self.messages[-1]["function_call"] = {}
-
-            self.messages[-1]["function_call"]["parsed_arguments"] = arguments
+        if "arguments" in self.messages[-1]["function_call"]:
+          arguments = self.messages[-1]["function_call"]["arguments"]
+          new_parsed_arguments = parse_partial_json(arguments)
+          if new_parsed_arguments:
+            self.messages[-1]["function_call"][
+              "parsed_arguments"] = new_parsed_arguments
 
       else:
         if in_function_call == True:
-
-          if self.local:
-          
-            llama_function_call_finished = True
-
-        in_function_call = False
+          in_function_call = False
 
         if self.active_block == None:
-
           self.active_block = MessageBlock()
 
       self.active_block.update_from_message(self.messages[-1])
 
-      if chunk["choices"][0]["finish_reason"] or llama_function_call_finished:
-        if chunk["choices"][
-            0]["finish_reason"] == "function_call" or llama_function_call_finished:
+      if finish_reason:
+        if finish_reason == "function_call":
 
           if self.debug_mode:
             print("Running function:")
@@ -712,7 +413,7 @@ def messages_to_prompt(messages):
               })
               return
 
-          if not self.local and "parsed_arguments" not in self.messages[-1]["function_call"]:
+          if "parsed_arguments" not in self.messages[-1]["function_call"]:
 
             self.messages.append({
               "role": "function",
@@ -742,9 +443,8 @@ def messages_to_prompt(messages):
 
           self.respond()
 
-        if chunk["choices"][0]["finish_reason"] != "function_call":
-
-          if self.local and "content" in self.messages[-1]:
+        else:
+          if "content" in self.messages[-1]:
             self.messages[-1]["content"] = self.messages[-1]["content"].strip().rstrip("#")
             self.active_block.update_from_message(self.messages[-1])
             time.sleep(0.1)
diff --git a/emplode/get_hf_llm.py b/emplode/get_hf_llm.py
deleted file mode 100644
index a93b02e..0000000
--- a/emplode/get_hf_llm.py
+++ /dev/null
@@ -1,291 +0,0 @@
-import os
-import sys
-import appdirs
-import traceback
-import inquirer
-import subprocess
-from rich import print
-from rich.markdown import Markdown
-import os
-import shutil
-from huggingface_hub import list_files_info, hf_hub_download
-
-
-def get_hf_llm(repo_id, debug_mode, context_window):
-
-    if "TheBloke/CodeLlama-" not in repo_id:
-      print('', Markdown(f"**Emplode** will use `{repo_id}` for local execution."), '')
-
-    raw_models = list_gguf_files(repo_id)
-    
-    if not raw_models:
-        print(f"Failed. Are you sure there are GGUF files in `{repo_id}`?")
-        return None
-
-    combined_models = group_and_combine_splits(raw_models)
-
-    selected_model = None
-
-    if len(combined_models) > 3:
-
-        choices = [
-            format_quality_choice(combined_models[0], "Small"),
-            format_quality_choice(combined_models[len(combined_models) // 2], "Medium"),
-            format_quality_choice(combined_models[-1], "Large"),
-            "See More"
-        ]
-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
-        answers = inquirer.prompt(questions)
-        if answers["selected_model"].startswith("Small"):
-            selected_model = combined_models[0]["filename"]
-        elif answers["selected_model"].startswith("Medium"):
-            selected_model = combined_models[len(combined_models) // 2]["filename"]
-        elif answers["selected_model"].startswith("Large"):
-            selected_model = combined_models[-1]["filename"]
-    
-    if selected_model == None:
-      
-        choices = [format_quality_choice(model) for model in combined_models]
-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
-        answers = inquirer.prompt(questions)
-        for model in combined_models:
-            if format_quality_choice(model) == answers["selected_model"]:
-                selected_model = model["filename"]
-                break
-
-    if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"):
-      n_gpu_layers = -1
-    else:
-      n_gpu_layers = 0
-
-    user_data_dir = appdirs.user_data_dir("Emplode")
-    default_path = os.path.join(user_data_dir, "models")
-
-    os.makedirs(default_path, exist_ok=True)
-
-    directories_to_check = [
-        default_path,
-        "llama.cpp/models/",
-        os.path.expanduser("~") + "/llama.cpp/models/",
-        "/"
-    ]
-
-    for directory in directories_to_check:
-        path = os.path.join(directory, selected_model)
-        if os.path.exists(path):
-            model_path = path
-            break
-    else:
-        download_path = os.path.join(default_path, selected_model)
-      
-        print(f"This language model was not found on your system.\n\nDownload to `{default_path}`?", "")
-        if confirm_action(""):
-            for model_details in combined_models:
-                if model_details["filename"] == selected_model:
-                    selected_model_details = model_details
-
-                    if not enough_disk_space(selected_model_details['Size'], default_path):
-                        print(f"You do not have enough disk space available to download this model.")
-                        return None
-
-            split_files = [model["filename"] for model in raw_models if selected_model in model["filename"]]
-            
-            if len(split_files) > 1:
-                for split_file in split_files:
-                    split_path = os.path.join(default_path, split_file)
-                    if os.path.exists(split_path):
-                        if not confirm_action(f"Split file {split_path} already exists. Download again?"):
-                            continue
-                    hf_hub_download(
-                        repo_id=repo_id,
-                        filename=split_file,
-                        local_dir=default_path,
-                        local_dir_use_symlinks=False,
-                        resume_download=True)
-        
-                actually_combine_files(default_path, selected_model, split_files)
-            else:
-                hf_hub_download(
-                    repo_id=repo_id,
-                    filename=selected_model,
-                    local_dir=default_path,
-                    local_dir_use_symlinks=False,
-                    resume_download=True)
-
-            model_path = download_path
-        
-        else:
-            print('\n', "Download cancelled. Exiting.", '\n')
-            return None
-
-    print(Markdown(f"Model found at `{model_path}`"))
-  
-    try:
-        from llama_cpp import Llama
-    except:
-        if debug_mode:
-            traceback.print_exc()
-        message = "Local LLM interface package not found. Install `llama-cpp-python`?"
-        if confirm_action(message):
-    
-            import platform
-            
-            def check_command(command):
-                try:
-                    subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                    return True
-                except subprocess.CalledProcessError:
-                    return False
-                except FileNotFoundError:
-                    return False
-            
-            def install_llama(backend):
-                env_vars = {
-                    "FORCE_CMAKE": "1"
-                }
-                
-                if backend == "cuBLAS":
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
-                elif backend == "hipBLAS":
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_HIPBLAS=on"
-                elif backend == "Metal":
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_METAL=on"
-                else: 
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
-                
-                try:
-                    subprocess.run([sys.executable, "-m", "pip", "install", "llama-cpp-python"], env={**os.environ, **env_vars}, check=True)
-                except subprocess.CalledProcessError as e:
-                    print(f"Error during installation with {backend}: {e}")
-            
-            def supports_metal():
-                if platform.system() == "Darwin":
-                    mac_version = tuple(map(int, platform.mac_ver()[0].split('.')))
-                    if mac_version >= (10, 11):
-                        return True
-                return False
-        
-            if check_command(["nvidia-smi"]):
-                install_llama("cuBLAS")
-            elif check_command(["rocminfo"]):
-                install_llama("hipBLAS")
-            elif supports_metal():
-                install_llama("Metal")
-            else:
-                install_llama("OpenBLAS")
-          
-            from llama_cpp import Llama
-            print('', Markdown("Finished downloading `Code-Llama` interface."), '')
-
-            if platform.system() == "Darwin":
-                if platform.machine() != "arm64":
-                    print("Warning: You are using Apple Silicon (M1/M2) Mac but your Python is not of 'arm64' architecture.")
-                    print("The llama.ccp x86 version will be 10x slower on Apple Silicon (M1/M2) Mac.")
-                    print("\nTo install the correct version of Python that supports 'arm64' architecture:")
-                    print("1. Download Miniforge for M1/M2:")
-                    print("wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh")
-                    print("2. Install it:")
-                    print("bash Miniforge3-MacOSX-arm64.sh")
-                    print("")
-      
-        else:
-            print('', "Installation cancelled. Exiting.", '')
-            return None
-        
-    assert os.path.isfile(model_path)
-    llama_2 = Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)
-      
-    return llama_2
-
-def confirm_action(message):
-    question = [
-        inquirer.Confirm('confirm',
-                         message=message,
-                         default=True),
-    ]
-
-    answers = inquirer.prompt(question)
-    return answers['confirm']
-
-
-import os
-import inquirer
-from huggingface_hub import list_files_info, hf_hub_download, login
-from typing import Dict, List, Union
-
-def list_gguf_files(repo_id: str) -> List[Dict[str, Union[str, float]]]:
-    try:
-      files_info = list_files_info(repo_id=repo_id)
-    except Exception as e:
-      if "authentication" in str(e).lower():
-        print("You likely need to be logged in to HuggingFace to access this language model.")
-        print(f"Visit this URL to log in and apply for access to this language model: https://huggingface.co/{repo_id}")
-        print("Then, log in here:")
-        login()
-        files_info = list_files_info(repo_id=repo_id)
-  
-    gguf_files = [file for file in files_info if "gguf" in file.rfilename]
-
-    gguf_files = sorted(gguf_files, key=lambda x: x.size)
-
-    result = []
-    for file in gguf_files:
-        size_in_gb = file.size / (1024**3)
-        filename = file.rfilename
-        result.append({
-            "filename": filename,
-            "Size": size_in_gb,
-            "RAM": size_in_gb + 2.5,
-        })
-
-    return result
-
-from typing import List, Dict, Union
-
-def group_and_combine_splits(models: List[Dict[str, Union[str, float]]]) -> List[Dict[str, Union[str, float]]]:
-    grouped_files = {}
-
-    for model in models:
-        base_name = model["filename"].split('-split-')[0]
-        
-        if base_name in grouped_files:
-            grouped_files[base_name]["Size"] += model["Size"]
-            grouped_files[base_name]["RAM"] += model["RAM"]
-            grouped_files[base_name]["SPLITS"].append(model["filename"])
-        else:
-            grouped_files[base_name] = {
-                "filename": base_name,
-                "Size": model["Size"],
-                "RAM": model["RAM"],
-                "SPLITS": [model["filename"]]
-            }
-
-    return list(grouped_files.values())
-
-
-def actually_combine_files(default_path: str, base_name: str, files: List[str]) -> None:
-    files.sort()    
-    base_path = os.path.join(default_path, base_name)
-    with open(base_path, 'wb') as outfile:
-        for file in files:
-            file_path = os.path.join(default_path, file)
-            with open(file_path, 'rb') as infile:
-                outfile.write(infile.read())
-            os.remove(file_path)
-
-def format_quality_choice(model, name_override = None) -> str:
-    if name_override:
-        name = name_override
-    else:
-        name = model['filename']
-    return f"{name} | Size: {model['Size']:.1f} GB, Estimated RAM usage: {model['RAM']:.1f} GB"
-
-def enough_disk_space(size, path) -> bool:
-    _, _, free = shutil.disk_usage(path)
-
-    free_gb = free / (2**30) 
-
-    if free_gb > size:
-        return True
-
-    return False
diff --git a/pyproject.toml b/pyproject.toml
index 7203053..1dfcc8f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,26 +10,16 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-openai = "^0.27.8"
-rich = "^13.4.2"
-tiktoken = "^0.4.0"
-astor = "^0.8.1"
-git-python = "^1.0.3"
-tokentrim = "^0.1.9"
-appdirs = "^1.4.4"
-six = "^1.16.0"
-python-dotenv = "^1.0.0"
-
-inquirer = "^3.1.3"
-wget = "^3.2"
-huggingface-hub = "^0.16.4"
-litellm = "^0.1.590"
-[tool.poetry.dependencies.pyreadline3]
-version = "^3.4.1"
-markers = "sys_platform == 'win32'"
+openai = "^1.0.0"
+rich = "*"
+tiktoken = "*"
+tokentrim = "*"
+python-dotenv = "*"
+requests = "*"
+packaging = "*"
 
 [tool.poetry.group.dev.dependencies]
-pytest = "^7.4.0"
+pytest = "*"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]

From 627c697f65394b605d1b2f35ef052dca589ebb3e Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Mon, 8 Sep 2025 07:26:02 +0000
Subject: [PATCH 2/9] Capy jam: Strip to core (-y only), remove external
 prompts/links, and bump deps to latest (openai 1.106.1, rich 14.1.0, tiktoken
 0.11.0, tokentrim 0.1.13)

Co-authored-by: Capy <capy@capy.ai>
---
 ...dize-on-GPT-5-and-remove-LiteLLM-to-.patch | 1184 +++++++++++++++++
 .capy/pr-body-gpt5-only.md                    |   31 +
 README.md                                     |   64 +-
 emplode/cli.py                                |   53 +-
 emplode/emplode.py                            |   27 +-
 pyproject.toml                                |   11 +-
 6 files changed, 1232 insertions(+), 138 deletions(-)
 create mode 100644 .capy/0001-Capy-jam-Standardize-on-GPT-5-and-remove-LiteLLM-to-.patch
 create mode 100644 .capy/pr-body-gpt5-only.md

diff --git a/.capy/0001-Capy-jam-Standardize-on-GPT-5-and-remove-LiteLLM-to-.patch b/.capy/0001-Capy-jam-Standardize-on-GPT-5-and-remove-LiteLLM-to-.patch
new file mode 100644
index 0000000..ebfa187
--- /dev/null
+++ b/.capy/0001-Capy-jam-Standardize-on-GPT-5-and-remove-LiteLLM-to-.patch
@@ -0,0 +1,1184 @@
+From 183cb2bb5514b81eced89287006a47d8036a06fc Mon Sep 17 00:00:00 2001
+From: shouryamaanjain <jainshouryamaan@gmail.com>
+Date: Mon, 8 Sep 2025 07:09:53 +0000
+Subject: [PATCH] Capy jam: Standardize on GPT-5 and remove LiteLLM to simplify
+ model usage and future-proof the agent; update dependencies and docs
+ accordingly
+
+Co-authored-by: Capy <capy@capy.ai>
+---
+ README.md             |  40 +---
+ emplode/cli.py        | 104 ----------
+ emplode/emplode.py    | 446 +++++++-----------------------------------
+ emplode/get_hf_llm.py | 291 ---------------------------
+ pyproject.toml        |  26 +--
+ 5 files changed, 88 insertions(+), 819 deletions(-)
+ delete mode 100644 emplode/get_hf_llm.py
+
+diff --git a/README.md b/README.md
+index 12d18d3..bb8dfcc 100644
+--- a/README.md
++++ b/README.md
+@@ -10,7 +10,7 @@
+ 
+ <br>
+ 
+-**Emplode** Agent performs actions on your system by executing code locally, It can also serve as an agentic framework for your disposable sandbox projects. You can chat with Emplode in your terminal by running `emplode` after installing.
++**Emplode** performs actions on your system by executing code locally. You can chat with Emplode in your terminal by running `emplode` after installing.
+ 
+ This provides a natural-language interface to your system's general-purpose capabilities:
+ 
+@@ -46,35 +46,11 @@ emplode.chat() # Starts an interactive chat
+ 
+ ## Commands
+ 
+-### Change the Model
+-
+-For `gpt-3.5-turbo`, use fast mode:
+-
+-```shell
+-emplode --fast
+-```
+-
+-In Python, you will need to set the model manually:
+-
+-```python
+-emplode.model = "gpt-3.5-turbo"
+-```
+-
+-### Running Emplode locally
+-
+-You can run `emplode` in local mode from the command line to use `Code Llama`:
+-
+-```shell
+-emplode --local
+-```
+-
+-Or run any Hugging Face model **locally** by using its repo ID (e.g. "tiiuae/falcon-180B"):
+-
+-```shell
+-emplode --model nvidia/Llama-3.1-Nemotron-70B-Instruct
+-emplode --model meta-llama/Llama-3.2-11B-Vision-Instruct
+-```
++Emplode now uses a single model, `gpt-5`, everywhere. There is no model selection and no local model support.
+ 
++- `-y`, `--yes`: execute code without user confirmation
++- `-d`, `--debug`: prints extra information
++- `--version`: display current Emplode version
+ 
+ ### Configuration with .env
+ 
+@@ -84,15 +60,13 @@ Here's a sample .env configuration:
+ 
+ ```
+ EMPLODE_CLI_AUTO_RUN=False
+-EMPLODE_CLI_FAST_MODE=False
+-EMPLODE_CLI_LOCAL_RUN=False
+ EMPLODE_CLI_DEBUG=False
+ ```
+ 
+-You can modify these values in the .env file to change the default behavior of the Emplode
++You can modify these values in the .env file to change the default behavior of Emplode.
+ 
+ ## How Does it Work?
+ 
+-Emplode equips a [function-calling model](https://platform.openai.com/docs/guides/gpt/function-calling) with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
++Emplode equips a function-calling model with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
+ 
+ <br>
+diff --git a/emplode/cli.py b/emplode/cli.py
+index ad170d0..6b7c94a 100644
+--- a/emplode/cli.py
++++ b/emplode/cli.py
+@@ -6,7 +6,6 @@ from packaging import version
+ import pkg_resources
+ from rich import print as rprint
+ from rich.markdown import Markdown
+-import inquirer
+ 
+ load_dotenv()
+ 
+@@ -27,10 +26,7 @@ def cli(emplode):
+     pass
+ 
+   AUTO_RUN = os.getenv('EMPLODE_CLI_AUTO_RUN', 'False') == 'True'
+-  FAST_MODE = os.getenv('EMPLODE_CLI_FAST_MODE', 'False') == 'True'
+-  LOCAL_RUN = os.getenv('EMPLODE_CLI_LOCAL_RUN', 'False') == 'True'
+   DEBUG = os.getenv('EMPLODE_CLI_DEBUG', 'False') == 'True'
+-  USE_AZURE = os.getenv('EMPLODE_CLI_USE_AZURE', 'False') == 'True'
+ 
+   parser = argparse.ArgumentParser(description='Command Emplode.')
+   
+@@ -39,126 +35,26 @@ def cli(emplode):
+                       action='store_true',
+                       default=AUTO_RUN,
+                       help='execute code without user confirmation')
+-  parser.add_argument('-f',
+-                      '--fast',
+-                      action='store_true',
+-                      default=FAST_MODE,
+-                      help='use gpt-4o-mini instead of gpt-4o')
+-  parser.add_argument('-l',
+-                      '--local',
+-                      action='store_true',
+-                      default=LOCAL_RUN,
+-                      help='run fully local with code-llama')
+-  parser.add_argument(
+-                      '--falcon',
+-                      action='store_true',
+-                      default=False,
+-                      help='run fully local with falcon-40b')
+   parser.add_argument('-d',
+                       '--debug',
+                       action='store_true',
+                       default=DEBUG,
+                       help='prints extra information')
+   
+-  parser.add_argument('--model',
+-                      type=str,
+-                      help='model name (for OpenAI compatible APIs) or HuggingFace repo',
+-                      default="",
+-                      required=False)
+-  
+-  parser.add_argument('--max_tokens',
+-                      type=int,
+-                      help='max tokens generated (for locally run models)')
+-  parser.add_argument('--context_window',
+-                      type=int,
+-                      help='context window in tokens (for locally run models)')
+-  
+-  parser.add_argument('--api_base',
+-                      type=str,
+-                      help='change your api_base to any OpenAI compatible api',
+-                      default="",
+-                      required=False)
+-  
+-  parser.add_argument('--use-azure',
+-                      action='store_true',
+-                      default=USE_AZURE,
+-                      help='use Azure OpenAI Services')
+-  
+   parser.add_argument('--version',
+                       action='store_true',
+                       help='display current Emplode version')
+ 
+   args = parser.parse_args()
+ 
+-
+   if args.version:
+     print("Emplode", pkg_resources.get_distribution("emplode").version)
+     return
+ 
+-  if args.max_tokens:
+-    emplode.max_tokens = args.max_tokens
+-  if args.context_window:
+-    emplode.context_window = args.context_window
+-
+   if args.yes:
+     emplode.auto_run = True
+-  if args.fast:
+-    emplode.model = "gpt-4o-mini"
+-  if args.local and not args.falcon:
+-    
+-    rprint('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
+-        
+-    models = {
+-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-        '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
+-        '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
+-    }
+-    
+-    parameter_choices = list(models.keys())
+-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-    answers = inquirer.prompt(questions)
+-    chosen_param = answers['param']
+ 
+-    emplode.model = models[chosen_param]
+-    emplode.local = True
+-
+-  
+   if args.debug:
+     emplode.debug_mode = True
+-  if args.use_azure:
+-    emplode.use_azure = True
+-    emplode.local = False
+-
+-
+-  if args.model != "":
+-    emplode.model = args.model
+-
+-    if "/" in emplode.model:
+-      emplode.local = True
+-
+-  if args.api_base:
+-    emplode.api_base = args.api_base
+-
+-  if args.falcon or args.model == "tiiuae/falcon-180B":
+-    
+-    rprint('', Markdown("**Emplode** will use `Falcon` for local execution."), '')
+-        
+-    models = {
+-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-        '40B': 'YokaiKoibito/falcon-40b-GGUF',
+-        '180B': 'TheBloke/Falcon-180B-Chat-GGUF'
+-    }
+-    
+-    parameter_choices = list(models.keys())
+-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-    answers = inquirer.prompt(questions)
+-    chosen_param = answers['param']
+-
+-    if chosen_param == "180B":
+-      rprint(Markdown("> **WARNING:** To run `Falcon-180B` we recommend at least `100GB` of RAM."))
+-
+-    emplode.model = models[chosen_param]
+-    emplode.local = True
+-
+ 
+   emplode.chat()
+diff --git a/emplode/emplode.py b/emplode/emplode.py
+index f30176c..3daab29 100644
+--- a/emplode/emplode.py
++++ b/emplode/emplode.py
+@@ -3,17 +3,13 @@ from .utils import merge_deltas, parse_partial_json
+ from .message_block import MessageBlock
+ from .code_block import CodeBlock
+ from .code_emplode import CodeEmplode
+-from .get_hf_llm import get_hf_llm
+ 
+ import os
+ import time
+ import traceback
+ import json
+ import platform
+-import openai
+-import litellm
+-import pkg_resources
+-
++from openai import OpenAI
+ import getpass
+ import requests
+ import readline
+@@ -44,19 +40,7 @@ function_schema = {
+   },
+ }
+ 
+-missing_api_key_message = """> OpenAI API key not found
+-
+-To use `GPT-4o` (recommended) please provide an OpenAI API key.
+-
+-To use `Code-Llama` (free but less capable) press `enter`.
+-"""
+-
+-missing_azure_info_message = """> Azure OpenAI Service API info not found
+-
+-To use `GPT-4` (recommended) please provide an Azure OpenAI API key, a API base, a deployment name and a API version.
+-
+-To use `Code-Llama` (free but less capable) press `enter`.
+-"""
++missing_api_key_message = "> OpenAI API key not found\n\nTo use `GPT-5` please provide an OpenAI API key.\n"
+ 
+ confirm_mode_message = """
+ **Emplode** will require approval before running code. Use `emplode -y` to bypass this.
+@@ -72,17 +56,10 @@ class Emplode:
+     self.temperature = 0.001
+     self.api_key = None
+     self.auto_run = False
+-    self.local = False
+-    self.model = "gpt-4o"
++    self.model = "gpt-5"
+     self.debug_mode = False
+-    self.api_base = None 
+-    self.context_window = 2000 
++    self.context_window = 200000
+     self.max_tokens = 750
+-    self.use_azure = False
+-    self.azure_api_base = None
+-    self.azure_api_version = None
+-    self.azure_deployment_name = None
+-    self.azure_api_type = "azure"
+     here = os.path.abspath(os.path.dirname(__file__))
+     with open(os.path.join(here, 'system_message.txt'), 'r') as f:
+       self.system_message = f.read().strip()
+@@ -91,7 +68,7 @@ class Emplode:
+ 
+     self.active_block = None
+ 
+-    self.llama_instance = None
++    self.client = None
+ 
+   def cli(self):
+     cli(self)
+@@ -106,38 +83,33 @@ class Emplode:
+ 
+     info += f"[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"
+ 
+-    if not self.local:
+-
+-      query = []
+-      for message in self.messages[-2:]:
+-        message_for_semantic_search = {"role": message["role"]}
+-        if "content" in message:
+-          message_for_semantic_search["content"] = message["content"]
+-        if "function_call" in message and "parsed_arguments" in message["function_call"]:
+-          message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
+-        query.append(message_for_semantic_search)
++    query = []
++    for message in self.messages[-2:]:
++      message_for_semantic_search = {"role": message.get("role", "assistant")}
++      if "content" in message:
++        message_for_semantic_search["content"] = message["content"]
++      if "function_call" in message and "parsed_arguments" in message["function_call"]:
++        message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
++      query.append(message_for_semantic_search)
+ 
+-      url = "https://open-procedures.replit.app/search/"
++    url = "https://open-procedures.replit.app/search/"
+ 
+-      try:
+-        relevant_procedures = requests.get(url, data=json.dumps(query)).json()["procedures"]
++    try:
++      relevant_procedures = requests.get(url, data=json.dumps(query)).json().get("procedures", [])
++      if relevant_procedures:
+         info += "\n\n# Recommended Procedures\n" + "\n---\n".join(relevant_procedures) + "\nIn your plan, include steps and, if present, **EXACT CODE SNIPPETS** (especially for depracation notices, **WRITE THEM INTO YOUR PLAN -- underneath each numbered step** as they will VANISH once you execute your first line of code, so WRITE THEM DOWN NOW if you need them) from the above procedures if they are relevant to the task. Again, include **VERBATIM CODE SNIPPETS** from the procedures above if they are relevent to the task **directly in your plan.**"
+-      except:
+-        pass
++    except:
++      pass
+ 
+-    elif self.local:
+-      info += "\n\nTo run code, write a fenced code block (i.e ```python, R or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its output."
+     return info
+ 
+   def reset(self):
+-    
+     self.messages = []
+     self.code_emplodes = {}
+ 
+   def load(self, messages):
+     self.messages = messages
+ 
+-
+   def handle_undo(self, arguments):
+ 
+     if len(self.messages) == 0:
+@@ -159,7 +131,7 @@ class Emplode:
+       if 'content' in message and message['content'] != None:
+         print(Markdown(f"**Removed message:** `\"{message['content'][:30]}...\"`"))
+       elif 'function_call' in message:
+-        print(Markdown(f"**Removed codeblock**")) # TODO: Could add preview of code removed here.
++        print(Markdown(f"**Removed codeblock**"))
+     
+     print("") 
+   def handle_help(self, arguments):
+@@ -246,48 +218,17 @@ class Emplode:
+ 
+   def chat(self, message=None, return_messages=False):
+ 
+-    if not self.local:
+-      self.verify_api_key()
+-
+-    if self.local:
+-
+-      if self.llama_instance == None:
+-        try:
+-          self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
+-          if self.llama_instance == None:
+-            return
+-        except:
+-          traceback.print_exc()
+-
+-          print(Markdown("".join([
+-            f"> Failed to install `{self.model}`.",
+-            f"\n\n**Common Fixes:** You can follow our simple setup docs at the link below to resolve common errors.\n\n```\nhttps://github.com/emplodeai/emplode/\n```",
+-            f"\n\n**If you've tried that and you're still getting an error, we have likely not built the proper `{self.model}` support for your system.**",
+-            "\n\n*( Running language models locally is a difficult task!* If you have insight into the best way to implement this across platforms/architectures, please join the Emplode community Discord and consider contributing the project's development. )",
+-            "\n\nPress enter to switch to `GPT-4o` (recommended)."
+-          ])))
+-          input()
+-
+-          self.local = False
+-          self.model = "gpt-4o"
+-          self.verify_api_key()
++    self.verify_api_key()
+ 
+     welcome_message = ""
+ 
+     if self.debug_mode:
+       welcome_message += "> Entered debug mode"
+ 
+-    if not self.local and not self.auto_run:
+-
+-      if self.use_azure:
+-        notice_model = f"{self.azure_deployment_name} (Azure)"
+-      else:
+-        notice_model = f"{self.model.upper()}"
+-      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To run locally, use `emplode --local`"
+-      
+-    if self.local:
+-      welcome_message += f"\n> Model set to `{self.model}`"
+-
++    if not self.auto_run:
++      notice_model = f"{self.model.upper()}"
++      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To auto-run code, use `emplode -y`"
++    
+     if not self.auto_run:
+       welcome_message += "\n\n" + confirm_mode_message
+ 
+@@ -326,132 +267,34 @@ class Emplode:
+         except KeyboardInterrupt:
+           pass
+         finally:
+-      
+           self.end_active_block()
+ 
+     if return_messages:
+         return self.messages
+ 
+   def verify_api_key(self):
+-    if self.use_azure:
+-      all_env_available = (
+-        ('AZURE_API_KEY' in os.environ or 'OPENAI_API_KEY' in os.environ) and
+-        'AZURE_API_BASE' in os.environ and
+-        'AZURE_API_VERSION' in os.environ and
+-        'AZURE_DEPLOYMENT_NAME' in os.environ)
+-      if all_env_available:
+-        self.api_key = os.environ.get('AZURE_API_KEY') or os.environ['OPENAI_API_KEY']
+-        self.azure_api_base = os.environ['AZURE_API_BASE']
+-        self.azure_api_version = os.environ['AZURE_API_VERSION']
+-        self.azure_deployment_name = os.environ['AZURE_DEPLOYMENT_NAME']
+-        self.azure_api_type = os.environ.get('AZURE_API_TYPE', 'azure')
++    if self.api_key is None:
++      if 'OPENAI_API_KEY' in os.environ:
++        self.api_key = os.environ['OPENAI_API_KEY']
+       else:
+         self._print_welcome_message()
+         time.sleep(1)
+ 
+         print(Rule(style="white"))
+ 
+-        print(Markdown(missing_azure_info_message), '', Rule(style="white"), '')
+-        response = input("Azure OpenAI API key: ")
++        print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
++        response = input("OpenAI API key: ")
+ 
+         if response == "":
+-
+-          print(Markdown(
+-            "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
+-                '')
+-          time.sleep(2)
+-          print(Rule(style="white"))
+-
+-          import inquirer
+-
+-          print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
+-
+-          models = {
+-              '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-              '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
+-              '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
+-          }
+-
+-          parameter_choices = list(models.keys())
+-          questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-          answers = inquirer.prompt(questions)
+-          chosen_param = answers['param']
+-
+-          self.model = models[chosen_param]
+-          self.local = True
+-
+-
+-
+-
+-          return
+-
++          raise Exception("OpenAI API key is required to use Emplode with GPT-5.")
+         else:
+           self.api_key = response
+-          self.azure_api_base = input("Azure OpenAI API base: ")
+-          self.azure_deployment_name = input("Azure OpenAI deployment name of GPT: ")
+-          self.azure_api_version = input("Azure OpenAI API version: ")
+-          print('', Markdown(
+-            "**Tip:** To save this key for later, run `export AZURE_API_KEY=your_api_key AZURE_API_BASE=your_api_base AZURE_API_VERSION=your_api_version AZURE_DEPLOYMENT_NAME=your_gpt_deployment_name` on Mac/Linux or `setx AZURE_API_KEY your_api_key AZURE_API_BASE your_api_base AZURE_API_VERSION your_api_version AZURE_DEPLOYMENT_NAME your_gpt_deployment_name` on Windows."),
+-                '')
++          print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
+           time.sleep(2)
+           print(Rule(style="white"))
+ 
+-      litellm.api_type = self.azure_api_type
+-      litellm.api_base = self.azure_api_base
+-      litellm.api_version = self.azure_api_version
+-      litellm.api_key = self.api_key
+-    else:
+-      if self.api_key == None:
+-        if 'OPENAI_API_KEY' in os.environ:
+-          self.api_key = os.environ['OPENAI_API_KEY']
+-        else:
+-          self._print_welcome_message()
+-          time.sleep(1)
+-
+-          print(Rule(style="white"))
+-
+-          print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
+-          response = input("OpenAI API key: ")
+-
+-          if response == "":
+-
+-              print(Markdown(
+-                "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
+-                    '')
+-              time.sleep(2)
+-              print(Rule(style="white"))
+-
+-              import inquirer
+-
+-              print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
+-
+-              models = {
+-                  '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-                  '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
+-                  '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
+-              }
+-
+-              parameter_choices = list(models.keys())
+-              questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-              answers = inquirer.prompt(questions)
+-              chosen_param = answers['param']
+-              self.model = models[chosen_param]
+-              self.local = True
+-
+-
+-
+-
+-              return
+-
+-          else:
+-              self.api_key = response
+-              print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
+-              time.sleep(2)
+-              print(Rule(style="white"))
+-
+-      litellm.api_key = self.api_key
+-      if self.api_base:
+-        litellm.api_base = self.api_base
++    if self.client is None:
++      self.client = OpenAI(api_key=self.api_key)
+ 
+   def end_active_block(self):
+     if self.active_block:
+@@ -461,149 +304,51 @@ class Emplode:
+   def respond(self):
+     info = self.get_info_for_system_message()
+ 
+-    if self.local:
+-      self.system_message = "\n".join(self.system_message.split("\n")[:2])
+-      self.system_message += "\nOnly do what the user asks you to do, then ask what they'd like to do next."
+-
+     system_message = self.system_message + "\n\n" + info
+ 
+-    if self.local:
+-      messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
+-    else:
+-      messages = tt.trim(self.messages, self.model, system_message=system_message)
++    messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
+ 
+     if self.debug_mode:
+       print("\n", "Sending `messages` to LLM:", "\n")
+       print(messages)
+       print()
+ 
+-    if not self.local:
+-      
+-      error = ""
+-      
+-      for _ in range(3): 
+-        try:
+-
+-            if self.use_azure:
+-              response = litellm.completion(
+-                  f"azure/{self.azure_deployment_name}",
+-                  messages=messages,
+-                  functions=[function_schema],
+-                  temperature=self.temperature,
+-                  stream=True,
+-                  )
+-            else:
+-              if self.api_base:
+-                response = litellm.completion(
+-                  api_base=self.api_base,
+-                  model = "custom/" + self.model,
+-                  messages=messages,
+-                  functions=[function_schema],
+-                  stream=True,
+-                  temperature=self.temperature,
+-                )
+-              else:
+-                response = litellm.completion(
+-                  model=self.model,
+-                  messages=messages,
+-                  functions=[function_schema],
+-                  stream=True,
+-                  temperature=self.temperature,
+-                )
+-
+-            break
+-        except:
+-            if self.debug_mode:
+-              traceback.print_exc()
+-            error = traceback.format_exc()
+-            time.sleep(3)
+-      else:
+-        raise Exception(error)
+-            
+-    elif self.local:
+-
+-      def messages_to_prompt(messages):
+-
++    error = ""
+ 
+-        for message in messages:
+-          if "role" not in message:
+-            message["role"] = "assistant"
+-
+-        if "falcon" in self.model.lower():
+-
+-          formatted_messages = ""
+-          for message in messages:
+-            formatted_messages += f"{message['role'].capitalize()}: {message['content']}\n"
+-          formatted_messages = formatted_messages.strip()
+-
+-        else:
+-          
+-          system_prompt = messages[0]['content']
+-          formatted_messages = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"
+-
+-          for index, item in enumerate(messages[1:]):
+-              role = item['role']
+-              content = item['content']
+-
+-              if role == 'user':
+-                  formatted_messages += f"{content} [/INST] "
+-              elif role == 'function':
+-                  formatted_messages += f"Output: {content} [/INST] "
+-              elif role == 'assistant':
+-                  formatted_messages += f"{content} </s><s>[INST] "
+-
+-          if formatted_messages.endswith("<s>[INST] "):
+-              formatted_messages = formatted_messages[:-10]
+-
+-        return formatted_messages
+-
+-      prompt = messages_to_prompt(messages)
+-      if messages[-1]["role"] != "function":
+-        prompt += "Let's explore this. By the way, I can run code on your machine by writing the code in a markdown code block. This works for shell, javascript, python, R, and applescript. I'm going to try to do this for your task. Anyway, "
+-      elif messages[-1]["role"] == "function" and messages[-1]["content"] != "No output":
+-        prompt += "Given the output of the code I just ran, "
+-      elif messages[-1]["role"] == "function" and messages[-1]["content"] == "No output":
+-        prompt += "Given the fact that the code I just ran produced no output, "
+-
+-
+-      if self.debug_mode:
+-        import builtins
+-        builtins.print("TEXT PROMPT SEND TO LLM:\n", prompt)
+-
+-      response = self.llama_instance(
+-        prompt,
+-        stream=True,
+-        temperature=self.temperature,
+-        stop=["</s>"],
+-        max_tokens=750 
+-      )
++    for _ in range(3): 
++      try:
++        response = self.client.chat.completions.create(
++          model=self.model,
++          messages=messages,
++          functions=[function_schema],
++          temperature=self.temperature,
++          stream=True,
++        )
++        break
++      except:
++        if self.debug_mode:
++          traceback.print_exc()
++        error = traceback.format_exc()
++        time.sleep(3)
++    else:
++      raise Exception(error)
+ 
+     self.messages.append({})
+     in_function_call = False
+-    llama_function_call_finished = False
+     self.active_block = None
+ 
+     for chunk in response:
+-      if self.use_azure and ('choices' not in chunk or len(chunk['choices']) == 0):
+-        continue
+-
+-      if self.local:
+-        if "content" not in messages[-1]:
+-          chunk["choices"][0]["text"] = chunk["choices"][0]["text"].capitalize()
+-          messages[-1]["role"] = "assistant"
+-        delta = {"content": chunk["choices"][0]["text"]}
+-      else:
+-        delta = chunk["choices"][0]["delta"]
++      try:
++        chunk_dict = chunk.model_dump()
++      except Exception:
++        chunk_dict = chunk
++
++      delta = chunk_dict.get("choices", [{}])[0].get("delta", {})
++      finish_reason = chunk_dict.get("choices", [{}])[0].get("finish_reason")
+ 
+       self.messages[-1] = merge_deltas(self.messages[-1], delta)
+ 
+-      if not self.local:
+-        condition = "function_call" in self.messages[-1]
+-      elif self.local:
+-        if "content" in self.messages[-1]:
+-          condition = self.messages[-1]["content"].count("```") % 2 == 1
+-        else:
+-          condition = False
++      condition = "function_call" in self.messages[-1]
+ 
+       if condition:
+         if in_function_call == False:
+@@ -618,68 +363,24 @@ class Emplode:
+ 
+         in_function_call = True
+ 
+-        if not self.local:
+-          if "arguments" in self.messages[-1]["function_call"]:
+-            arguments = self.messages[-1]["function_call"]["arguments"]
+-            new_parsed_arguments = parse_partial_json(arguments)
+-            if new_parsed_arguments:
+-              self.messages[-1]["function_call"][
+-                "parsed_arguments"] = new_parsed_arguments
+-
+-        elif self.local:
+-          if "content" in self.messages[-1]:
+-
+-            content = self.messages[-1]["content"]
+-
+-            if "```" in content:
+-              blocks = content.split("```")
+-
+-              current_code_block = blocks[-1]
+-
+-              lines = current_code_block.split("\n")
+-
+-              if content.strip() == "```": 
+-                language = None
+-              else:
+-                if lines[0] != "":
+-                  language = lines[0].strip()
+-                else:
+-                  language = "python"
+-                  if len(lines) > 1:
+-                    if lines[1].startswith("pip"):
+-                      language = "shell"
+-
+-              code = '\n'.join(lines[1:]).strip("` \n")
+-
+-              arguments = {"code": code}
+-              if language: 
+-                if language == "bash":
+-                  language = "shell"
+-                arguments["language"] = language
+-
+-            if "function_call" not in self.messages[-1]:
+-              self.messages[-1]["function_call"] = {}
+-
+-            self.messages[-1]["function_call"]["parsed_arguments"] = arguments
++        if "arguments" in self.messages[-1]["function_call"]:
++          arguments = self.messages[-1]["function_call"]["arguments"]
++          new_parsed_arguments = parse_partial_json(arguments)
++          if new_parsed_arguments:
++            self.messages[-1]["function_call"][
++              "parsed_arguments"] = new_parsed_arguments
+ 
+       else:
+         if in_function_call == True:
+-
+-          if self.local:
+-          
+-            llama_function_call_finished = True
+-
+-        in_function_call = False
++          in_function_call = False
+ 
+         if self.active_block == None:
+-
+           self.active_block = MessageBlock()
+ 
+       self.active_block.update_from_message(self.messages[-1])
+ 
+-      if chunk["choices"][0]["finish_reason"] or llama_function_call_finished:
+-        if chunk["choices"][
+-            0]["finish_reason"] == "function_call" or llama_function_call_finished:
++      if finish_reason:
++        if finish_reason == "function_call":
+ 
+           if self.debug_mode:
+             print("Running function:")
+@@ -712,7 +413,7 @@ class Emplode:
+               })
+               return
+ 
+-          if not self.local and "parsed_arguments" not in self.messages[-1]["function_call"]:
++          if "parsed_arguments" not in self.messages[-1]["function_call"]:
+ 
+             self.messages.append({
+               "role": "function",
+@@ -742,9 +443,8 @@ class Emplode:
+ 
+           self.respond()
+ 
+-        if chunk["choices"][0]["finish_reason"] != "function_call":
+-
+-          if self.local and "content" in self.messages[-1]:
++        else:
++          if "content" in self.messages[-1]:
+             self.messages[-1]["content"] = self.messages[-1]["content"].strip().rstrip("#")
+             self.active_block.update_from_message(self.messages[-1])
+             time.sleep(0.1)
+diff --git a/emplode/get_hf_llm.py b/emplode/get_hf_llm.py
+deleted file mode 100644
+index a93b02e..0000000
+--- a/emplode/get_hf_llm.py
++++ /dev/null
+@@ -1,291 +0,0 @@
+-import os
+-import sys
+-import appdirs
+-import traceback
+-import inquirer
+-import subprocess
+-from rich import print
+-from rich.markdown import Markdown
+-import os
+-import shutil
+-from huggingface_hub import list_files_info, hf_hub_download
+-
+-
+-def get_hf_llm(repo_id, debug_mode, context_window):
+-
+-    if "TheBloke/CodeLlama-" not in repo_id:
+-      print('', Markdown(f"**Emplode** will use `{repo_id}` for local execution."), '')
+-
+-    raw_models = list_gguf_files(repo_id)
+-    
+-    if not raw_models:
+-        print(f"Failed. Are you sure there are GGUF files in `{repo_id}`?")
+-        return None
+-
+-    combined_models = group_and_combine_splits(raw_models)
+-
+-    selected_model = None
+-
+-    if len(combined_models) > 3:
+-
+-        choices = [
+-            format_quality_choice(combined_models[0], "Small"),
+-            format_quality_choice(combined_models[len(combined_models) // 2], "Medium"),
+-            format_quality_choice(combined_models[-1], "Large"),
+-            "See More"
+-        ]
+-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
+-        answers = inquirer.prompt(questions)
+-        if answers["selected_model"].startswith("Small"):
+-            selected_model = combined_models[0]["filename"]
+-        elif answers["selected_model"].startswith("Medium"):
+-            selected_model = combined_models[len(combined_models) // 2]["filename"]
+-        elif answers["selected_model"].startswith("Large"):
+-            selected_model = combined_models[-1]["filename"]
+-    
+-    if selected_model == None:
+-      
+-        choices = [format_quality_choice(model) for model in combined_models]
+-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
+-        answers = inquirer.prompt(questions)
+-        for model in combined_models:
+-            if format_quality_choice(model) == answers["selected_model"]:
+-                selected_model = model["filename"]
+-                break
+-
+-    if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"):
+-      n_gpu_layers = -1
+-    else:
+-      n_gpu_layers = 0
+-
+-    user_data_dir = appdirs.user_data_dir("Emplode")
+-    default_path = os.path.join(user_data_dir, "models")
+-
+-    os.makedirs(default_path, exist_ok=True)
+-
+-    directories_to_check = [
+-        default_path,
+-        "llama.cpp/models/",
+-        os.path.expanduser("~") + "/llama.cpp/models/",
+-        "/"
+-    ]
+-
+-    for directory in directories_to_check:
+-        path = os.path.join(directory, selected_model)
+-        if os.path.exists(path):
+-            model_path = path
+-            break
+-    else:
+-        download_path = os.path.join(default_path, selected_model)
+-      
+-        print(f"This language model was not found on your system.\n\nDownload to `{default_path}`?", "")
+-        if confirm_action(""):
+-            for model_details in combined_models:
+-                if model_details["filename"] == selected_model:
+-                    selected_model_details = model_details
+-
+-                    if not enough_disk_space(selected_model_details['Size'], default_path):
+-                        print(f"You do not have enough disk space available to download this model.")
+-                        return None
+-
+-            split_files = [model["filename"] for model in raw_models if selected_model in model["filename"]]
+-            
+-            if len(split_files) > 1:
+-                for split_file in split_files:
+-                    split_path = os.path.join(default_path, split_file)
+-                    if os.path.exists(split_path):
+-                        if not confirm_action(f"Split file {split_path} already exists. Download again?"):
+-                            continue
+-                    hf_hub_download(
+-                        repo_id=repo_id,
+-                        filename=split_file,
+-                        local_dir=default_path,
+-                        local_dir_use_symlinks=False,
+-                        resume_download=True)
+-        
+-                actually_combine_files(default_path, selected_model, split_files)
+-            else:
+-                hf_hub_download(
+-                    repo_id=repo_id,
+-                    filename=selected_model,
+-                    local_dir=default_path,
+-                    local_dir_use_symlinks=False,
+-                    resume_download=True)
+-
+-            model_path = download_path
+-        
+-        else:
+-            print('\n', "Download cancelled. Exiting.", '\n')
+-            return None
+-
+-    print(Markdown(f"Model found at `{model_path}`"))
+-  
+-    try:
+-        from llama_cpp import Llama
+-    except:
+-        if debug_mode:
+-            traceback.print_exc()
+-        message = "Local LLM interface package not found. Install `llama-cpp-python`?"
+-        if confirm_action(message):
+-    
+-            import platform
+-            
+-            def check_command(command):
+-                try:
+-                    subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+-                    return True
+-                except subprocess.CalledProcessError:
+-                    return False
+-                except FileNotFoundError:
+-                    return False
+-            
+-            def install_llama(backend):
+-                env_vars = {
+-                    "FORCE_CMAKE": "1"
+-                }
+-                
+-                if backend == "cuBLAS":
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
+-                elif backend == "hipBLAS":
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_HIPBLAS=on"
+-                elif backend == "Metal":
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_METAL=on"
+-                else: 
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
+-                
+-                try:
+-                    subprocess.run([sys.executable, "-m", "pip", "install", "llama-cpp-python"], env={**os.environ, **env_vars}, check=True)
+-                except subprocess.CalledProcessError as e:
+-                    print(f"Error during installation with {backend}: {e}")
+-            
+-            def supports_metal():
+-                if platform.system() == "Darwin":
+-                    mac_version = tuple(map(int, platform.mac_ver()[0].split('.')))
+-                    if mac_version >= (10, 11):
+-                        return True
+-                return False
+-        
+-            if check_command(["nvidia-smi"]):
+-                install_llama("cuBLAS")
+-            elif check_command(["rocminfo"]):
+-                install_llama("hipBLAS")
+-            elif supports_metal():
+-                install_llama("Metal")
+-            else:
+-                install_llama("OpenBLAS")
+-          
+-            from llama_cpp import Llama
+-            print('', Markdown("Finished downloading `Code-Llama` interface."), '')
+-
+-            if platform.system() == "Darwin":
+-                if platform.machine() != "arm64":
+-                    print("Warning: You are using Apple Silicon (M1/M2) Mac but your Python is not of 'arm64' architecture.")
+-                    print("The llama.ccp x86 version will be 10x slower on Apple Silicon (M1/M2) Mac.")
+-                    print("\nTo install the correct version of Python that supports 'arm64' architecture:")
+-                    print("1. Download Miniforge for M1/M2:")
+-                    print("wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh")
+-                    print("2. Install it:")
+-                    print("bash Miniforge3-MacOSX-arm64.sh")
+-                    print("")
+-      
+-        else:
+-            print('', "Installation cancelled. Exiting.", '')
+-            return None
+-        
+-    assert os.path.isfile(model_path)
+-    llama_2 = Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)
+-      
+-    return llama_2
+-
+-def confirm_action(message):
+-    question = [
+-        inquirer.Confirm('confirm',
+-                         message=message,
+-                         default=True),
+-    ]
+-
+-    answers = inquirer.prompt(question)
+-    return answers['confirm']
+-
+-
+-import os
+-import inquirer
+-from huggingface_hub import list_files_info, hf_hub_download, login
+-from typing import Dict, List, Union
+-
+-def list_gguf_files(repo_id: str) -> List[Dict[str, Union[str, float]]]:
+-    try:
+-      files_info = list_files_info(repo_id=repo_id)
+-    except Exception as e:
+-      if "authentication" in str(e).lower():
+-        print("You likely need to be logged in to HuggingFace to access this language model.")
+-        print(f"Visit this URL to log in and apply for access to this language model: https://huggingface.co/{repo_id}")
+-        print("Then, log in here:")
+-        login()
+-        files_info = list_files_info(repo_id=repo_id)
+-  
+-    gguf_files = [file for file in files_info if "gguf" in file.rfilename]
+-
+-    gguf_files = sorted(gguf_files, key=lambda x: x.size)
+-
+-    result = []
+-    for file in gguf_files:
+-        size_in_gb = file.size / (1024**3)
+-        filename = file.rfilename
+-        result.append({
+-            "filename": filename,
+-            "Size": size_in_gb,
+-            "RAM": size_in_gb + 2.5,
+-        })
+-
+-    return result
+-
+-from typing import List, Dict, Union
+-
+-def group_and_combine_splits(models: List[Dict[str, Union[str, float]]]) -> List[Dict[str, Union[str, float]]]:
+-    grouped_files = {}
+-
+-    for model in models:
+-        base_name = model["filename"].split('-split-')[0]
+-        
+-        if base_name in grouped_files:
+-            grouped_files[base_name]["Size"] += model["Size"]
+-            grouped_files[base_name]["RAM"] += model["RAM"]
+-            grouped_files[base_name]["SPLITS"].append(model["filename"])
+-        else:
+-            grouped_files[base_name] = {
+-                "filename": base_name,
+-                "Size": model["Size"],
+-                "RAM": model["RAM"],
+-                "SPLITS": [model["filename"]]
+-            }
+-
+-    return list(grouped_files.values())
+-
+-
+-def actually_combine_files(default_path: str, base_name: str, files: List[str]) -> None:
+-    files.sort()    
+-    base_path = os.path.join(default_path, base_name)
+-    with open(base_path, 'wb') as outfile:
+-        for file in files:
+-            file_path = os.path.join(default_path, file)
+-            with open(file_path, 'rb') as infile:
+-                outfile.write(infile.read())
+-            os.remove(file_path)
+-
+-def format_quality_choice(model, name_override = None) -> str:
+-    if name_override:
+-        name = name_override
+-    else:
+-        name = model['filename']
+-    return f"{name} | Size: {model['Size']:.1f} GB, Estimated RAM usage: {model['RAM']:.1f} GB"
+-
+-def enough_disk_space(size, path) -> bool:
+-    _, _, free = shutil.disk_usage(path)
+-
+-    free_gb = free / (2**30) 
+-
+-    if free_gb > size:
+-        return True
+-
+-    return False
+diff --git a/pyproject.toml b/pyproject.toml
+index 7203053..1dfcc8f 100644
+--- a/pyproject.toml
++++ b/pyproject.toml
+@@ -10,26 +10,16 @@ readme = "README.md"
+ 
+ [tool.poetry.dependencies]
+ python = "^3.10"
+-openai = "^0.27.8"
+-rich = "^13.4.2"
+-tiktoken = "^0.4.0"
+-astor = "^0.8.1"
+-git-python = "^1.0.3"
+-tokentrim = "^0.1.9"
+-appdirs = "^1.4.4"
+-six = "^1.16.0"
+-python-dotenv = "^1.0.0"
+-
+-inquirer = "^3.1.3"
+-wget = "^3.2"
+-huggingface-hub = "^0.16.4"
+-litellm = "^0.1.590"
+-[tool.poetry.dependencies.pyreadline3]
+-version = "^3.4.1"
+-markers = "sys_platform == 'win32'"
++openai = "^1.0.0"
++rich = "*"
++tiktoken = "*"
++tokentrim = "*"
++python-dotenv = "*"
++requests = "*"
++packaging = "*"
+ 
+ [tool.poetry.group.dev.dependencies]
+-pytest = "^7.4.0"
++pytest = "*"
+ 
+ [build-system]
+ requires = ["poetry-core>=1.0.0"]
+-- 
+2.30.2
+
diff --git a/.capy/pr-body-gpt5-only.md b/.capy/pr-body-gpt5-only.md
new file mode 100644
index 0000000..91a65fd
--- /dev/null
+++ b/.capy/pr-body-gpt5-only.md
@@ -0,0 +1,31 @@
+Title: Migrate Emplode to GPT-5 only, remove LiteLLM, and update deps
+
+Summary
+- Standardize the project on a single LLM: GPT-5. Removed all alternative model paths (LiteLLM, Azure/OpenAI switches, local HuggingFace/Code Llama, Falcon) and simplified CLI and runtime accordingly.
+- Replace LiteLLM with the official OpenAI client and streaming chat completions; refactor message streaming to keep existing UX (MessageBlock/CodeBlock) intact.
+- Update Poetry dependencies to latest-compatible constraints and remove unused libs; refresh README to match the new, simplified flow.
+
+Details
+- Core:
+  - Default model set to `gpt-5`; all model selection flags and local/azure code paths removed.
+  - Switched from LiteLLM to `openai` client (>=1.x) with streaming + function-calling (`run_code`).
+  - Unified message trimming to a token-window approach to avoid model-name coupling.
+- CLI:
+  - Simplified to only `--yes`, `--debug`, and `--version`.
+  - Removed `--fast`, `--local`, `--falcon`, `--model`, `--api_base`, and `--use-azure`.
+- Deps:
+  - Removed: `litellm`, `huggingface-hub`, `inquirer`, `appdirs`, `wget`, `six`, `git-python`, `astor`.
+  - Added: `requests`, `packaging`. Updated constraints for `openai`, `rich`, `tiktoken`, `tokentrim`, `python-dotenv`.
+- Docs:
+  - README now states GPT-5-only usage and the new CLI flags; removed local/HF and fast-mode sections.
+
+Impact
+- Leaner, single-path runtime; fewer moving parts and less configuration.
+- No more local/HF model downloads or Azure branching; requires only `OPENAI_API_KEY`.
+- Dependency surface reduced; easier to maintain and upgrade going forward.
+
+Notes
+- This change removes all alternate model support intentionally per request; if a compatibility shim is desired (e.g., token counting for unknown models), we can add that in a follow-up.
+
+
+₍ᐢ•(ܫ)•ᐢ₎ Generated by [Capy](https://capy.ai) ([view task](https://capy.ai/project/5719ac6b-84af-11f0-a94e-3eef481a796b/task/768ae859-543c-481f-b4a9-514f56c81a6f))
\ No newline at end of file
diff --git a/README.md b/README.md
index bb8dfcc..808f581 100644
--- a/README.md
+++ b/README.md
@@ -1,72 +1,28 @@
-<h1 align="center">/Emplode.</h1>
+<h1 align="center">Emplode</h1>
 
-<p align="center">
-    <a href="https://discord.gg/uZmvdFpSyW">
-        <img alt="Discord" src="https://img.shields.io/discord/1172527582684651600?logo=discord&style=flat&logoColor=white"/>
-    </a>
-    <br><br>
-    <b>Agent that performs action on your system by executing code.</b>
-</p>
-
-<br>
-
-**Emplode** performs actions on your system by executing code locally. You can chat with Emplode in your terminal by running `emplode` after installing.
-
-This provides a natural-language interface to your system's general-purpose capabilities:
-
-- Create, edit and arrange files.
-- Control a browser to perform research
-- Plot, clean, and analyze large datasets
-- ...etc.
-
-<br>
+Simple terminal agent that executes code on your machine.
 
 ## Quick Start
 
 ```shell
 pip install emplode
-```
-
-### Terminal
-
-After installation, simply run `emplode`:
-
-```shell
 emplode
 ```
 
-### Python
+## Python
 
 ```python
 import emplode
-
-emplode.chat("Organize all images in my downloads folder into subfolders by year, naming each folder after the year.") # Executes a single command
-emplode.chat() # Starts an interactive chat
+emplode.chat("Organize my downloads by year.")
+emplode.chat()
 ```
 
-## Commands
-
-Emplode now uses a single model, `gpt-5`, everywhere. There is no model selection and no local model support.
-
-- `-y`, `--yes`: execute code without user confirmation
-- `-d`, `--debug`: prints extra information
-- `--version`: display current Emplode version
-
-### Configuration with .env
-
-Emplode allows you to set default behaviors using a .env file. This provides a flexible way to configure it without changing command-line arguments every time.
-
-Here's a sample .env configuration:
-
-```
-EMPLODE_CLI_AUTO_RUN=False
-EMPLODE_CLI_DEBUG=False
-```
+## CLI
 
-You can modify these values in the .env file to change the default behavior of Emplode.
+Only one flag is supported:
 
-## How Does it Work?
+- `-y` / `--yes`: run code without asking for confirmation.
 
-Emplode equips a function-calling model with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
+## How it works
 
-<br>
+Emplode uses a function-calling model (gpt-5) with a single function `run_code(language, code)`. When the model calls the function, the code is executed locally and the output is returned to the model.
diff --git a/emplode/cli.py b/emplode/cli.py
index 6b7c94a..640fb0d 100644
--- a/emplode/cli.py
+++ b/emplode/cli.py
@@ -1,60 +1,11 @@
 import argparse
-import os
-from dotenv import load_dotenv
-import requests
-from packaging import version
-import pkg_resources
-from rich import print as rprint
-from rich.markdown import Markdown
-
-load_dotenv()
-
-def check_for_update():
-    response = requests.get(f'https://pypi.org/pypi/emplode/json')
-    latest_version = response.json()['info']['version']
-
-    current_version = pkg_resources.get_distribution("emplode").version
-
-    return version.parse(latest_version) > version.parse(current_version)
 
 def cli(emplode):
-
-  try:
-    if check_for_update():
-      print("A new version is available. Please run 'pip install --upgrade emplode'.")
-  except:
-    pass
-
-  AUTO_RUN = os.getenv('EMPLODE_CLI_AUTO_RUN', 'False') == 'True'
-  DEBUG = os.getenv('EMPLODE_CLI_DEBUG', 'False') == 'True'
-
-  parser = argparse.ArgumentParser(description='Command Emplode.')
-  
-  parser.add_argument('-y',
-                      '--yes',
-                      action='store_true',
-                      default=AUTO_RUN,
-                      help='execute code without user confirmation')
-  parser.add_argument('-d',
-                      '--debug',
-                      action='store_true',
-                      default=DEBUG,
-                      help='prints extra information')
-  
-  parser.add_argument('--version',
-                      action='store_true',
-                      help='display current Emplode version')
-
+  parser = argparse.ArgumentParser(description='Emplode')
+  parser.add_argument('-y', '--yes', action='store_true', help='execute code without confirmation')
   args = parser.parse_args()
 
-  if args.version:
-    print("Emplode", pkg_resources.get_distribution("emplode").version)
-    return
-
   if args.yes:
     emplode.auto_run = True
 
-  if args.debug:
-    emplode.debug_mode = True
-
   emplode.chat()
diff --git a/emplode/emplode.py b/emplode/emplode.py
index 3daab29..2ea8a7d 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -11,7 +11,6 @@
 import platform
 from openai import OpenAI
 import getpass
-import requests
 import readline
 import tokentrim as tt
 from rich import print
@@ -83,24 +82,6 @@ def get_info_for_system_message(self):
 
     info += f"[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"
 
-    query = []
-    for message in self.messages[-2:]:
-      message_for_semantic_search = {"role": message.get("role", "assistant")}
-      if "content" in message:
-        message_for_semantic_search["content"] = message["content"]
-      if "function_call" in message and "parsed_arguments" in message["function_call"]:
-        message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
-      query.append(message_for_semantic_search)
-
-    url = "https://open-procedures.replit.app/search/"
-
-    try:
-      relevant_procedures = requests.get(url, data=json.dumps(query)).json().get("procedures", [])
-      if relevant_procedures:
-        info += "\n\n# Recommended Procedures\n" + "\n---\n".join(relevant_procedures) + "\nIn your plan, include steps and, if present, **EXACT CODE SNIPPETS** (especially for depracation notices, **WRITE THEM INTO YOUR PLAN -- underneath each numbered step** as they will VANISH once you execute your first line of code, so WRITE THEM DOWN NOW if you need them) from the above procedures if they are relevant to the task. Again, include **VERBATIM CODE SNIPPETS** from the procedures above if they are relevent to the task **directly in your plan.**"
-    except:
-      pass
-
     return info
 
   def reset(self):
@@ -151,13 +132,7 @@ def handle_help(self, arguments):
     for cmd, desc in commands_description.items():
       base_message.append(f"- `{cmd}`: {desc}\n")
 
-    additional_info = [
-      "\n\nFor further assistance, please join our community Discord or consider contributing to the project's development."
-    ]
-
-    full_message = base_message + additional_info
-
-    print(Markdown("".join(full_message)))
+    print(Markdown("".join(base_message)))
 
 
   def handle_debug(self, arguments=None):
diff --git a/pyproject.toml b/pyproject.toml
index 1dfcc8f..bf38ad3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,13 +10,10 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-openai = "^1.0.0"
-rich = "*"
-tiktoken = "*"
-tokentrim = "*"
-python-dotenv = "*"
-requests = "*"
-packaging = "*"
+openai = "^1.106.1"
+rich = "^14.1.0"
+tiktoken = "^0.11.0"
+tokentrim = "^0.1.13"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "*"

From 6225ab9a2a132076af220919de239fdec9a92a6f Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Mon, 8 Sep 2025 07:39:30 +0000
Subject: [PATCH 3/9] Capy jam: Set temperature=1.0 and omit temperature param
 for GPT-5 compatibility

---
 emplode/emplode.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/emplode/emplode.py b/emplode/emplode.py
index 2ea8a7d..9f47b5e 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -52,7 +52,7 @@ class Emplode:
 
   def __init__(self):
     self.messages = []
-    self.temperature = 0.001
+    self.temperature = 1.0
     self.api_key = None
     self.auto_run = False
     self.model = "gpt-5"
@@ -296,7 +296,6 @@ def respond(self):
           model=self.model,
           messages=messages,
           functions=[function_schema],
-          temperature=self.temperature,
           stream=True,
         )
         break

From 9b13d95acdde68fb86773874a002fdb0541bef99 Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Mon, 8 Sep 2025 07:42:31 +0000
Subject: [PATCH 4/9] Capy jam: Add non-streaming fallback when streaming is
 not permitted by org; keep unified processing path

---
 emplode/emplode.py | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/emplode/emplode.py b/emplode/emplode.py
index 9f47b5e..a645e48 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -10,6 +10,7 @@
 import json
 import platform
 from openai import OpenAI
+from openai import BadRequestError
 import getpass
 import readline
 import tokentrim as tt
@@ -290,16 +291,36 @@ def respond(self):
 
     error = ""
 
-    for _ in range(3): 
-      try:
-        response = self.client.chat.completions.create(
+    def request(use_stream=True):
+      if use_stream:
+        return self.client.chat.completions.create(
           model=self.model,
           messages=messages,
           functions=[function_schema],
           stream=True,
         )
+      else:
+        r = self.client.chat.completions.create(
+          model=self.model,
+          messages=messages,
+          functions=[function_schema],
+          stream=False,
+        )
+        choice = r.choices[0]
+        msg = choice.message.model_dump()
+        return [{"choices":[{"delta": msg, "finish_reason": choice.finish_reason}]}]
+
+    for _ in range(3):
+      try:
+        try:
+          response = request(use_stream=True)
+        except BadRequestError as e:
+          if "must be verified to stream this model" in str(e).lower() or "param': 'stream'" in str(e):
+            response = request(use_stream=False)
+          else:
+            raise
         break
-      except:
+      except Exception:
         if self.debug_mode:
           traceback.print_exc()
         error = traceback.format_exc()

From 5fd6e4e351366d9b4e8ebaf37573e8ac402e6795 Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Mon, 8 Sep 2025 07:45:44 +0000
Subject: [PATCH 5/9] Capy jam: After running code, stop recursion to avoid
 unsupported 'function' role for GPT-5; end after execution

---
 emplode/emplode.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/emplode/emplode.py b/emplode/emplode.py
index a645e48..bc90918 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -416,7 +416,6 @@ def request(use_stream=True):
               "content": """Your function call could not be parsed. Please use ONLY the `run_code` function, which takes two parameters: `code` and `language`. Your response should be formatted as a JSON."""
             })
 
-            self.respond()
             return
 
           language = self.messages[-1]["function_call"]["parsed_arguments"][
@@ -436,7 +435,7 @@ def request(use_stream=True):
             "content": self.active_block.output if self.active_block.output else "No output"
           })
 
-          self.respond()
+          return
 
         else:
           if "content" in self.messages[-1]:

From 6f8833c9588b3a0f13395225b09cd001ec184676 Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Mon, 8 Sep 2025 15:52:58 +0000
Subject: [PATCH 6/9] Capy jam: Switch to tools API with non-stream completions
 for GPT-5; map tool_calls to function_call; harden parsing to avoid None
 errors; remove temperature usage

---
 emplode/emplode.py | 236 +++++++++++++++------------------------------
 1 file changed, 78 insertions(+), 158 deletions(-)

diff --git a/emplode/emplode.py b/emplode/emplode.py
index bc90918..e339ead 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -20,15 +20,13 @@
 
 function_schema = {
   "name": "run_code",
-  "description":
-  "Executes code on the user's machine and returns the output",
+  "description": "Executes code on the user's machine and returns the output",
   "parameters": {
     "type": "object",
     "properties": {
       "language": {
         "type": "string",
-        "description":
-        "The programming language",
+        "description": "The programming language",
         "enum": ["python", "R", "shell", "applescript", "javascript", "html"]
       },
       "code": {
@@ -279,7 +277,6 @@ def end_active_block(self):
 
   def respond(self):
     info = self.get_info_for_system_message()
-
     system_message = self.system_message + "\n\n" + info
 
     messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
@@ -289,162 +286,85 @@ def respond(self):
       print(messages)
       print()
 
-    error = ""
-
-    def request(use_stream=True):
-      if use_stream:
-        return self.client.chat.completions.create(
-          model=self.model,
-          messages=messages,
-          functions=[function_schema],
-          stream=True,
-        )
-      else:
-        r = self.client.chat.completions.create(
-          model=self.model,
-          messages=messages,
-          functions=[function_schema],
-          stream=False,
-        )
-        choice = r.choices[0]
-        msg = choice.message.model_dump()
-        return [{"choices":[{"delta": msg, "finish_reason": choice.finish_reason}]}]
-
-    for _ in range(3):
-      try:
-        try:
-          response = request(use_stream=True)
-        except BadRequestError as e:
-          if "must be verified to stream this model" in str(e).lower() or "param': 'stream'" in str(e):
-            response = request(use_stream=False)
-          else:
-            raise
-        break
-      except Exception:
-        if self.debug_mode:
-          traceback.print_exc()
-        error = traceback.format_exc()
-        time.sleep(3)
-    else:
-      raise Exception(error)
-
-    self.messages.append({})
-    in_function_call = False
-    self.active_block = None
-
-    for chunk in response:
-      try:
-        chunk_dict = chunk.model_dump()
-      except Exception:
-        chunk_dict = chunk
-
-      delta = chunk_dict.get("choices", [{}])[0].get("delta", {})
-      finish_reason = chunk_dict.get("choices", [{}])[0].get("finish_reason")
-
-      self.messages[-1] = merge_deltas(self.messages[-1], delta)
-
-      condition = "function_call" in self.messages[-1]
-
-      if condition:
-        if in_function_call == False:
-
-          self.end_active_block()
-
-          last_role = self.messages[-2]["role"]
-          if last_role == "user" or last_role == "function":
-            print()
-
-          self.active_block = CodeBlock()
-
-        in_function_call = True
-
-        if "arguments" in self.messages[-1]["function_call"]:
-          arguments = self.messages[-1]["function_call"]["arguments"]
-          new_parsed_arguments = parse_partial_json(arguments)
-          if new_parsed_arguments:
-            self.messages[-1]["function_call"][
-              "parsed_arguments"] = new_parsed_arguments
-
-      else:
-        if in_function_call == True:
-          in_function_call = False
-
-        if self.active_block == None:
-          self.active_block = MessageBlock()
-
-      self.active_block.update_from_message(self.messages[-1])
-
-      if finish_reason:
-        if finish_reason == "function_call":
-
-          if self.debug_mode:
-            print("Running function:")
-            print(self.messages[-1])
-            print("---")
-
-          if self.auto_run == False:
-
-            self.active_block.end()
-            language = self.active_block.language
-            code = self.active_block.code
-
-            response = input("  Would you like to run this code? (y/n)\n\n  ")
-            print("")
-
-            if response.strip().lower() == "y":
-              self.active_block = CodeBlock()
-              self.active_block.language = language
-              self.active_block.code = code
-
-            else:
-              self.active_block.end()
-              self.messages.append({
-                "role":
-                "function",
-                "name":
-                "run_code",
-                "content":
-                "User decided not to run this code."
-              })
-              return
-
-          if "parsed_arguments" not in self.messages[-1]["function_call"]:
-
-            self.messages.append({
-              "role": "function",
-              "name": "run_code",
-              "content": """Your function call could not be parsed. Please use ONLY the `run_code` function, which takes two parameters: `code` and `language`. Your response should be formatted as a JSON."""
-            })
-
-            return
-
-          language = self.messages[-1]["function_call"]["parsed_arguments"][
-            "language"]
-          if language not in self.code_emplodes:
-            self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
-          code_emplode = self.code_emplodes[language]
-
-          code_emplode.active_block = self.active_block
-          code_emplode.run()
-
-          self.active_block.end()
-
-          self.messages.append({
-            "role": "function",
-            "name": "run_code",
-            "content": self.active_block.output if self.active_block.output else "No output"
-          })
-
+    # Prefer non-streaming for GPT-5 and use tools API
+    try:
+      r = self.client.chat.completions.create(
+        model=self.model,
+        messages=messages,
+        tools=[{"type": "function", "function": function_schema}],
+        stream=False,
+      )
+    except BadRequestError as e:
+      # If tools are not supported, fall back to no tools
+      if self.debug_mode:
+        traceback.print_exc()
+      r = self.client.chat.completions.create(
+        model=self.model,
+        messages=messages,
+        stream=False,
+      )
+
+    choice = r.choices[0]
+    msg = choice.message
+
+    # Build a synthetic assistant message for our transcript
+    assistant_msg = {"role": "assistant"}
+
+    # Handle tool calls (function calling)
+    tool_calls = getattr(msg, "tool_calls", None)
+    if tool_calls:
+      fn = tool_calls[0].function
+      arguments = fn.arguments or ""
+      assistant_msg["function_call"] = {"name": fn.name, "arguments": arguments}
+      self.messages.append(assistant_msg)
+
+      # Parse arguments safely
+      parsed = parse_partial_json(arguments) or {}
+      language = parsed.get("language")
+      code = parsed.get("code")
+      if not language or not code:
+        self.active_block = MessageBlock()
+        self.active_block.update_from_message({"content": "Your function call could not be parsed. It must include JSON with 'language' and 'code'."})
+        self.active_block.end()
+        return
+
+      # Show code block
+      self.end_active_block()
+      print()
+      self.active_block = CodeBlock()
+      self.active_block.language = language
+      self.active_block.code = code
+      self.active_block.refresh()
+
+      if self.auto_run is False:
+        self.active_block.end()
+        resp = input("  Would you like to run this code? (y/n)\n\n  ")
+        print("")
+        if resp.strip().lower() != "y":
           return
+        self.active_block = CodeBlock()
+        self.active_block.language = language
+        self.active_block.code = code
+
+      # Execute
+      if language not in self.code_emplodes:
+        self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
+      code_emplode = self.code_emplodes[language]
+      code_emplode.active_block = self.active_block
+      code_emplode.run()
+      self.active_block.end()
+      return
 
-        else:
-          if "content" in self.messages[-1]:
-            self.messages[-1]["content"] = self.messages[-1]["content"].strip().rstrip("#")
-            self.active_block.update_from_message(self.messages[-1])
-            time.sleep(0.1)
+    # Otherwise, plain assistant content
+    content = msg.content or ""
+    assistant_msg["content"] = content
+    self.messages.append(assistant_msg)
 
-          self.active_block.end()
-          return
+    self.end_active_block()
+    self.active_block = MessageBlock()
+    self.active_block.update_from_message({"content": content})
+    self.active_block.end()
+    return
 
   def _print_welcome_message(self):
     print("", "", Markdown(f"\nWelcome to **Emplode**.\n"), "")

From 0a88d28841c6a6225310e262126caf0edaa79bde Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Tue, 9 Sep 2025 05:45:01 +0000
Subject: [PATCH 7/9] Capy jam: Use OpenAI Responses API with tools; add strict
 JSON schema (additionalProperties=false); implement live streaming with
 fallback; robust tool-call handling

---
 emplode/emplode.py | 374 ++++++++++++++++++++++-----------------------
 1 file changed, 186 insertions(+), 188 deletions(-)

diff --git a/emplode/emplode.py b/emplode/emplode.py
index e339ead..36bd964 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -18,7 +18,9 @@
 from rich.markdown import Markdown
 from rich.rule import Rule
 
-function_schema = {
+# Responses API tool definition for function-calling (strict JSON Schema)
+RUN_CODE_TOOL = {
+  "type": "function",
   "name": "run_code",
   "description": "Executes code on the user's machine and returns the output",
   "parameters": {
@@ -34,16 +36,15 @@
         "description": "The code to execute"
       }
     },
-    "required": ["language", "code"]
-  },
+    "required": ["language", "code"],
+    "additionalProperties": False
+  }
 }
 
-missing_api_key_message = "> OpenAI API key not found\n\nTo use `GPT-5` please provide an OpenAI API key.\n"
+missing_api_key_message = "> OpenAI API key not found. Provide an OpenAI API key to continue.\n"
 
 confirm_mode_message = """
-**Emplode** will require approval before running code. Use `emplode -y` to bypass this.
-
-Press `CTRL-C` to exit.
+Emplode will require approval before running code. Use `emplode -y` to bypass this.
 """
 
 
@@ -51,7 +52,6 @@ class Emplode:
 
   def __init__(self):
     self.messages = []
-    self.temperature = 1.0
     self.api_key = None
     self.auto_run = False
     self.model = "gpt-5"
@@ -63,25 +63,17 @@ def __init__(self):
       self.system_message = f.read().strip()
 
     self.code_emplodes = {}
-
     self.active_block = None
-
     self.client = None
 
   def cli(self):
     cli(self)
 
   def get_info_for_system_message(self):
-
-    info = ""
-
     username = getpass.getuser()
-    current_working_directory = os.getcwd()
-    operating_system = platform.system()
-
-    info += f"[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"
-
-    return info
+    cwd = os.getcwd()
+    os_name = platform.system()
+    return f"[User Info]\nName: {username}\nCWD: {cwd}\nOS: {os_name}"
 
   def reset(self):
     self.messages = []
@@ -91,59 +83,48 @@ def load(self, messages):
     self.messages = messages
 
   def handle_undo(self, arguments):
-
     if len(self.messages) == 0:
       return
     last_user_index = None
     for i, message in enumerate(self.messages):
-        if message.get('role') == 'user':
-            last_user_index = i
-
-    removed_messages = []
-
+      if message.get('role') == 'user':
+        last_user_index = i
+    removed = []
     if last_user_index is not None:
-        removed_messages = self.messages[last_user_index:]
-        self.messages = self.messages[:last_user_index]
-
-    print("") 
-
-    for message in removed_messages:
-      if 'content' in message and message['content'] != None:
-        print(Markdown(f"**Removed message:** `\"{message['content'][:30]}...\"`"))
-      elif 'function_call' in message:
-        print(Markdown(f"**Removed codeblock**"))
-    
-    print("") 
+      removed = self.messages[last_user_index:]
+      self.messages = self.messages[:last_user_index]
+    print("")
+    for m in removed:
+      if 'content' in m and m['content'] is not None:
+        print(Markdown(f"**Removed message:** `\"{m['content'][:30]}...\"`"))
+      elif 'function_call' in m:
+        print(Markdown("**Removed codeblock**"))
+    print("")
+
   def handle_help(self, arguments):
-    commands_description = {
-      "%debug [true/false]": "Toggle debug mode. Without arguments or with 'true', it enters debug mode. With 'false', it exits debug mode.",
-      "%reset": "Resets the current session.",
-      "%undo": "Remove previous messages and its response from the message history.",
-      "%save_message [path]": "Saves messages to a specified JSON path. If no path is provided, it defaults to 'messages.json'.",
-      "%load_message [path]": "Loads messages from a specified JSON path. If no path is provided, it defaults to 'messages.json'.",
+    items = {
+      "%debug [true/false]": "Toggle debug mode.",
+      "%reset": "Reset the current session.",
+      "%undo": "Remove the previous user message and response.",
+      "%save_message [path]": "Save messages to JSON.",
+      "%load_message [path]": "Load messages from JSON.",
       "%help": "Show this help message.",
     }
-
-    base_message = [
-      "> **Available Commands:**\n\n"
-    ]
-
-    for cmd, desc in commands_description.items():
-      base_message.append(f"- `{cmd}`: {desc}\n")
-
-    print(Markdown("".join(base_message)))
-
+    base = ["> **Available Commands:**\n\n"]
+    for cmd, desc in items.items():
+      base.append(f"- `{cmd}`: {desc}\n")
+    print(Markdown("".join(base)))
 
   def handle_debug(self, arguments=None):
     if arguments == "" or arguments == "true":
-        print(Markdown("> Entered debug mode"))
-        print(self.messages)
-        self.debug_mode = True
+      print(Markdown("> Entered debug mode"))
+      print(self.messages)
+      self.debug_mode = True
     elif arguments == "false":
-        print(Markdown("> Exited debug mode"))
-        self.debug_mode = False
+      print(Markdown("> Exited debug mode"))
+      self.debug_mode = False
     else:
-        print(Markdown("> Unknown argument to debug command."))
+      print(Markdown("> Unknown argument to debug command."))
 
   def handle_reset(self, arguments):
     self.reset()
@@ -160,7 +141,6 @@ def handle_save_message(self, json_path):
       json_path += ".json"
     with open(json_path, 'w') as f:
       json.dump(self.messages, f, indent=2)
-
     print(Markdown(f"> messages json export to {os.path.abspath(json_path)}"))
 
   def handle_load_message(self, json_path):
@@ -170,7 +150,6 @@ def handle_load_message(self, json_path):
       json_path += ".json"
     with open(json_path, 'r') as f:
       self.load(json.load(f))
-
     print(Markdown(f"> messages json loaded from {os.path.abspath(json_path)}"))
 
   def handle_command(self, user_input):
@@ -182,60 +161,38 @@ def handle_command(self, user_input):
       "load_message": self.handle_load_message,
       "undo": self.handle_undo,
     }
-
-    user_input = user_input[1:].strip()  
+    user_input = user_input[1:].strip()
     command = user_input.split(" ")[0]
     arguments = user_input[len(command):].strip()
-    action = switch.get(command,
-                        self.default_handle)  
-    action(arguments)  
+    switch.get(command, self.default_handle)(arguments)
 
   def chat(self, message=None, return_messages=False):
-
     self.verify_api_key()
 
-    welcome_message = ""
-
+    welcome = ""
     if self.debug_mode:
-      welcome_message += "> Entered debug mode"
-
+      welcome += "> Entered debug mode"
+    welcome += f"\n> Model set to `{self.model.upper()}`"
     if not self.auto_run:
-      notice_model = f"{self.model.upper()}"
-      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To auto-run code, use `emplode -y`"
-    
-    if not self.auto_run:
-      welcome_message += "\n\n" + confirm_mode_message
-
-    welcome_message = welcome_message.strip()
-
-    if welcome_message != "":
-      if welcome_message.startswith(">"):
-        print(Markdown(welcome_message), '')
-      else:
-        print('', Markdown(welcome_message), '')
+      welcome += f"\n\n{confirm_mode_message}"
+    welcome = welcome.strip()
+    if welcome:
+      print(Markdown(welcome), '')
 
     if message:
       self.messages.append({"role": "user", "content": message})
       self.respond()
-
     else:
       while True:
         try:
           user_input = input("> ").strip()
-        except EOFError:
-          break
-        except KeyboardInterrupt:
-          print()  
+        except (EOFError, KeyboardInterrupt):
+          print()
           break
-
-        readline.add_history(user_input)
-
         if user_input.startswith("%") or user_input.startswith("/"):
           self.handle_command(user_input)
           continue
-
         self.messages.append({"role": "user", "content": user_input})
-
         try:
           self.respond()
         except KeyboardInterrupt:
@@ -244,29 +201,17 @@ def chat(self, message=None, return_messages=False):
           self.end_active_block()
 
     if return_messages:
-        return self.messages
+      return self.messages
 
   def verify_api_key(self):
     if self.api_key is None:
-      if 'OPENAI_API_KEY' in os.environ:
-        self.api_key = os.environ['OPENAI_API_KEY']
-      else:
-        self._print_welcome_message()
-        time.sleep(1)
-
-        print(Rule(style="white"))
-
-        print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
-        response = input("OpenAI API key: ")
-
-        if response == "":
+      key = os.environ.get('OPENAI_API_KEY')
+      if not key:
+        print(Markdown(missing_api_key_message))
+        key = input("OpenAI API key: ").strip()
+        if not key:
           raise Exception("OpenAI API key is required to use Emplode with GPT-5.")
-        else:
-          self.api_key = response
-          print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
-          time.sleep(2)
-          print(Rule(style="white"))
-
+      self.api_key = key
     if self.client is None:
       self.client = OpenAI(api_key=self.api_key)
 
@@ -275,96 +220,149 @@ def end_active_block(self):
       self.active_block.end()
       self.active_block = None
 
-  def respond(self):
-    info = self.get_info_for_system_message()
-    system_message = self.system_message + "\n\n" + info
+  def _stream_with_responses(self, sys_and_messages):
+    content_buf = ""
+    tool_name = None
+    tool_args_buf = ""
 
-    messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
-
-    if self.debug_mode:
-      print("\n", "Sending `messages` to LLM:", "\n")
-      print(messages)
-      print()
-
-    # Prefer non-streaming for GPT-5 and use tools API
+    # Live stream
     try:
-      r = self.client.chat.completions.create(
+      with self.client.responses.stream(
         model=self.model,
-        messages=messages,
-        tools=[{"type": "function", "function": function_schema}],
-        stream=False,
-      )
-    except BadRequestError as e:
-      # If tools are not supported, fall back to no tools
-      if self.debug_mode:
-        traceback.print_exc()
-      r = self.client.chat.completions.create(
+        input=sys_and_messages,
+        tools=[RUN_CODE_TOOL],
+      ) as stream:
+        for event in stream:
+          t = getattr(event, 'type', '')
+          # Text deltas
+          if 'output_text.delta' in t:
+            delta = getattr(event, 'delta', '') or getattr(event, 'text', '')
+            if delta:
+              content_buf += delta
+              if not isinstance(self.active_block, MessageBlock):
+                self.end_active_block()
+                self.active_block = MessageBlock()
+              self.active_block.update_from_message({"content": content_buf})
+          # Tool call incremental pieces
+          elif 'tool_call.delta' in t:
+            d = getattr(event, 'delta', None)
+            if isinstance(d, dict):
+              if not tool_name and d.get('name'):
+                tool_name = d['name']
+              if d.get('arguments'):
+                tool_args_buf += d['arguments']
+            elif isinstance(d, str):
+              tool_args_buf += d
+          # Tool call finished
+          elif 'tool_call.completed' in t:
+            # Execute tool now
+            self._execute_run_code(tool_name, tool_args_buf)
+            return
+        # finalize response (ensures any remaining chunks are processed)
+        _ = stream.get_final_response()
+    except BadRequestError:
+      # Fallback to non-stream
+      r = self.client.responses.create(
         model=self.model,
-        messages=messages,
+        input=sys_and_messages,
+        tools=[RUN_CODE_TOOL],
         stream=False,
       )
+      return self._handle_nonstream_response(r)
 
-    choice = r.choices[0]
-    msg = choice.message
-
-    # Build a synthetic assistant message for our transcript
-    assistant_msg = {"role": "assistant"}
-
-    # Handle tool calls (function calling)
-    tool_calls = getattr(msg, "tool_calls", None)
-    if tool_calls:
-      fn = tool_calls[0].function
-      arguments = fn.arguments or ""
-      assistant_msg["function_call"] = {"name": fn.name, "arguments": arguments}
-      self.messages.append(assistant_msg)
-
-      # Parse arguments safely
-      parsed = parse_partial_json(arguments) or {}
-      language = parsed.get("language")
-      code = parsed.get("code")
-      if not language or not code:
-        self.active_block = MessageBlock()
-        self.active_block.update_from_message({"content": "Your function call could not be parsed. It must include JSON with 'language' and 'code'."})
-        self.active_block.end()
-        return
+  def _handle_nonstream_response(self, r):
+    # Try to read tool calls; structure can vary by SDK version
+    try:
+      out = getattr(r, 'output', None) or []
+    except Exception:
+      out = []
+    # Search for tool call
+    tool_name = None
+    tool_args = None
+    for item in out:
+      t = getattr(item, 'type', None)
+      if t == 'tool_call':
+        f = getattr(item, 'tool_call', None)
+        if f and getattr(f, 'type', '') == 'function':
+          tool_name = getattr(f, 'name', None)
+          tool_args = getattr(f, 'arguments', None)
+          break
+      if t == 'message' and hasattr(item, 'content'):
+        # Plain assistant text
+        text_parts = []
+        for c in getattr(item, 'content', []) or []:
+          if getattr(c, 'type', None) == 'output_text':
+            text_parts.append(getattr(c, 'text', '') or '')
+        text = ''.join(text_parts)
+        if text:
+          self.end_active_block()
+          self.active_block = MessageBlock()
+          self.active_block.update_from_message({"content": text})
+          self.active_block.end()
+          return
 
-      # Show code block
+    if tool_name:
+      self._execute_run_code(tool_name, tool_args or "")
+      return
+
+    # If we got here, just show best-effort text
+    try:
+      text = getattr(r, 'output_text', '') or ''
+    except Exception:
+      text = ''
+    if text:
       self.end_active_block()
-      print()
+      self.active_block = MessageBlock()
+      self.active_block.update_from_message({"content": text})
+      self.active_block.end()
+
+  def _execute_run_code(self, tool_name, raw_args):
+    if tool_name != 'run_code':
+      return
+    parsed = parse_partial_json(raw_args or "") or {}
+    language = parsed.get('language')
+    code = parsed.get('code')
+    if not language or not code:
+      self.end_active_block()
+      self.active_block = MessageBlock()
+      self.active_block.update_from_message({"content": "Tool arguments missing 'language' or 'code'."})
+      self.active_block.end()
+      return
+    # Show code
+    self.end_active_block()
+    print()
+    self.active_block = CodeBlock()
+    self.active_block.language = language
+    self.active_block.code = code
+    self.active_block.refresh()
+    if self.auto_run is False:
+      self.active_block.end()
+      resp = input("  Would you like to run this code? (y/n)\n\n  ")
+      print("")
+      if resp.strip().lower() != 'y':
+        return
       self.active_block = CodeBlock()
       self.active_block.language = language
       self.active_block.code = code
-      self.active_block.refresh()
+    if language not in self.code_emplodes:
+      self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
+    ce = self.code_emplodes[language]
+    ce.active_block = self.active_block
+    ce.run()
+    self.active_block.end()
 
-      if self.auto_run is False:
-        self.active_block.end()
-        resp = input("  Would you like to run this code? (y/n)\n\n  ")
-        print("")
-        if resp.strip().lower() != "y":
-          return
-        self.active_block = CodeBlock()
-        self.active_block.language = language
-        self.active_block.code = code
-
-      # Execute
-      if language not in self.code_emplodes:
-        self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
-      code_emplode = self.code_emplodes[language]
-      code_emplode.active_block = self.active_block
-      code_emplode.run()
-      self.active_block.end()
-      return
+  def respond(self):
+    info = self.get_info_for_system_message()
+    system_message = self.system_message + "\n\n" + info
 
-    # Otherwise, plain assistant content
-    content = msg.content or ""
-    assistant_msg["content"] = content
-    self.messages.append(assistant_msg)
+    # Trim conversation to fit
+    trimmed = tt.trim(self.messages, max_tokens=(self.context_window - self.max_tokens - 25), system_message=system_message)
 
-    self.end_active_block()
-    self.active_block = MessageBlock()
-    self.active_block.update_from_message({"content": content})
-    self.active_block.end()
-    return
+    # Convert to Responses API input
+    sys_and_messages = [{"role": "system", "content": system_message}] + trimmed[1:]
+
+    # Stream first; fallback to non-stream automatically
+    self._stream_with_responses(sys_and_messages)
 
   def _print_welcome_message(self):
     print("", "", Markdown(f"\nWelcome to **Emplode**.\n"), "")

From 1d6761653b7caef2faf8f60eb7835d11698c23fb Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Tue, 9 Sep 2025 06:26:39 +0000
Subject: [PATCH 8/9] Capy jam: Detect markdown code fences in
 streamed/non-streamed responses and execute when tool call is absent;
 improves GPT-5 code execution

---
 emplode/emplode.py | 49 ++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/emplode/emplode.py b/emplode/emplode.py
index 36bd964..7dabab9 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -9,6 +9,7 @@
 import traceback
 import json
 import platform
+import re
 from openai import OpenAI
 from openai import BadRequestError
 import getpass
@@ -220,6 +221,17 @@ def end_active_block(self):
       self.active_block.end()
       self.active_block = None
 
+  def _extract_last_code_block(self, text):
+    pattern = re.compile(r"```([a-zA-Z]+)?\n([\s\S]*?)```", re.DOTALL)
+    matches = list(pattern.finditer(text or ""))
+    if not matches:
+      return None, None
+    lang = matches[-1].group(1) or "python"
+    code = matches[-1].group(2) or ""
+    if lang == "bash":
+      lang = "shell"
+    return lang, code.strip()
+
   def _stream_with_responses(self, sys_and_messages):
     content_buf = ""
     tool_name = None
@@ -255,10 +267,15 @@ def _stream_with_responses(self, sys_and_messages):
               tool_args_buf += d
           # Tool call finished
           elif 'tool_call.completed' in t:
-            # Execute tool now
             self._execute_run_code(tool_name, tool_args_buf)
             return
-        # finalize response (ensures any remaining chunks are processed)
+          # Response finished
+          elif t.endswith('completed') or t == 'response.completed':
+            # If model wrote a code block instead of tool call, run it
+            lang, code = self._extract_last_code_block(content_buf)
+            if lang and code:
+              self._execute_run_code('run_code', json.dumps({"language": lang, "code": code}))
+            return
         _ = stream.get_final_response()
     except BadRequestError:
       # Fallback to non-stream
@@ -279,6 +296,7 @@ def _handle_nonstream_response(self, r):
     # Search for tool call
     tool_name = None
     tool_args = None
+    text_accum = ""
     for item in out:
       t = getattr(item, 'type', None)
       if t == 'tool_call':
@@ -288,32 +306,21 @@ def _handle_nonstream_response(self, r):
           tool_args = getattr(f, 'arguments', None)
           break
       if t == 'message' and hasattr(item, 'content'):
-        # Plain assistant text
-        text_parts = []
         for c in getattr(item, 'content', []) or []:
           if getattr(c, 'type', None) == 'output_text':
-            text_parts.append(getattr(c, 'text', '') or '')
-        text = ''.join(text_parts)
-        if text:
-          self.end_active_block()
-          self.active_block = MessageBlock()
-          self.active_block.update_from_message({"content": text})
-          self.active_block.end()
-          return
-
+            text_accum += getattr(c, 'text', '') or ''
     if tool_name:
       self._execute_run_code(tool_name, tool_args or "")
       return
-
-    # If we got here, just show best-effort text
-    try:
-      text = getattr(r, 'output_text', '') or ''
-    except Exception:
-      text = ''
-    if text:
+    if text_accum:
+      # Try to run code fence if present
+      lang, code = self._extract_last_code_block(text_accum)
+      if lang and code:
+        self._execute_run_code('run_code', json.dumps({"language": lang, "code": code}))
+        return
       self.end_active_block()
       self.active_block = MessageBlock()
-      self.active_block.update_from_message({"content": text})
+      self.active_block.update_from_message({"content": text_accum})
       self.active_block.end()
 
   def _execute_run_code(self, tool_name, raw_args):

From 9243a5957e41295e2fed12882d6e4689bed4e2d7 Mon Sep 17 00:00:00 2001
From: shouryamaanjain <jainshouryamaan@gmail.com>
Date: Tue, 9 Sep 2025 06:51:42 +0000
Subject: [PATCH 9/9] Capy jam: Add autonomous error recovery and auto-install
 for missing Python modules; env toggles EMPLODE_AUTO_FIX_LIMIT and
 EMPLODE_AUTO_INSTALL; implement direct re-run after install

---
 emplode/emplode.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/emplode/emplode.py b/emplode/emplode.py
index 7dabab9..54dbd72 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -59,6 +59,10 @@ def __init__(self):
     self.debug_mode = False
     self.context_window = 200000
     self.max_tokens = 750
+    self.max_auto_fixes = int(os.getenv('EMPLODE_AUTO_FIX_LIMIT', '5'))
+    self._auto_fix_count = 0
+    self.auto_install = os.getenv('EMPLODE_AUTO_INSTALL', 'true').lower() in ('1','true','yes','y')
+    self._install_attempted = set()
     here = os.path.abspath(os.path.dirname(__file__))
     with open(os.path.join(here, 'system_message.txt'), 'r') as f:
       self.system_message = f.read().strip()
@@ -169,6 +173,7 @@ def handle_command(self, user_input):
 
   def chat(self, message=None, return_messages=False):
     self.verify_api_key()
+    self._auto_fix_count = 0
 
     welcome = ""
     if self.debug_mode:
@@ -323,6 +328,74 @@ def _handle_nonstream_response(self, r):
       self.active_block.update_from_message({"content": text_accum})
       self.active_block.end()
 
+  def _is_error_output(self, output):
+    out = (output or "").lower()
+    patterns = [
+      "traceback (most recent call last)",
+      "error:",
+      "exception:",
+      "command not found",
+      "no such file or directory",
+      "module not found",
+      "moduleNotFoundError".lower(),
+      "nameerror:",
+      "syntaxerror:",
+      "typeerror:",
+      "valueerror:",
+      "runtimeerror:",
+    ]
+    return any(p in out for p in patterns)
+
+  def _run_code_direct(self, language, code):
+    self.end_active_block()
+    self.active_block = CodeBlock()
+    self.active_block.language = language
+    self.active_block.code = code
+    self.active_block.refresh()
+    if language not in self.code_emplodes:
+      self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
+    ce = self.code_emplodes[language]
+    ce.active_block = self.active_block
+    ce.run()
+    output = self.active_block.output
+    self.active_block.end()
+    return output
+
+  def _maybe_auto_install(self, language, code, output):
+    if not self.auto_install or language != 'python':
+      return False
+    text = output or ""
+    m = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", text)
+    if not m:
+      m = re.search(r"No module named ['\"]?([A-Za-z0-9_\-.]+)['\"]?", text)
+    if not m:
+      return False
+    pkg = m.group(1)
+    if pkg in self._install_attempted:
+      return False
+    self._install_attempted.add(pkg)
+
+    install_cmd = f"python -m pip install -U {pkg} || python3 -m pip install -U {pkg}"
+    self._run_code_direct('shell', install_cmd)
+    new_out = self._run_code_direct(language, code)
+    self._auto_fix_or_finish(new_out)
+    return True
+
+  def _auto_fix_or_finish(self, output):
+    if self._auto_fix_count >= self.max_auto_fixes:
+      return
+    if self._is_error_output(output):
+      self._auto_fix_count += 1
+      self.messages.append({
+        "role": "user",
+        "content": (
+          "Execution failed. Here is the full output from your last run:\n\n" +
+          (output or "No output") +
+          "\n\nPlease fix the issue and try again using the run_code tool only."
+        )
+      })
+      self.respond()
+
   def _execute_run_code(self, tool_name, raw_args):
     if tool_name != 'run_code':
       return
@@ -356,7 +429,12 @@ def _execute_run_code(self, tool_name, raw_args):
     ce = self.code_emplodes[language]
     ce.active_block = self.active_block
     ce.run()
+    output = self.active_block.output
     self.active_block.end()
+    # Try auto-install then auto-fix
+    if self._maybe_auto_install(language, code, output):
+      return
+    self._auto_fix_or_finish(output)
 
   def respond(self):
     info = self.get_info_for_system_message()