diff --git a/.capy/0001-Capy-jam-Standardize-on-GPT-5-and-remove-LiteLLM-to-.patch b/.capy/0001-Capy-jam-Standardize-on-GPT-5-and-remove-LiteLLM-to-.patch
new file mode 100644
index 0000000..ebfa187
--- /dev/null
+++ b/.capy/0001-Capy-jam-Standardize-on-GPT-5-and-remove-LiteLLM-to-.patch
@@ -0,0 +1,1184 @@
+From 183cb2bb5514b81eced89287006a47d8036a06fc Mon Sep 17 00:00:00 2001
+From: shouryamaanjain <jainshouryamaan@gmail.com>
+Date: Mon, 8 Sep 2025 07:09:53 +0000
+Subject: [PATCH] Capy jam: Standardize on GPT-5 and remove LiteLLM to simplify
+ model usage and future-proof the agent; update dependencies and docs
+ accordingly
+
+Co-authored-by: Capy <capy@capy.ai>
+---
+ README.md             |  40 +---
+ emplode/cli.py        | 104 ----------
+ emplode/emplode.py    | 446 +++++++-----------------------------------
+ emplode/get_hf_llm.py | 291 ---------------------------
+ pyproject.toml        |  26 +--
+ 5 files changed, 88 insertions(+), 819 deletions(-)
+ delete mode 100644 emplode/get_hf_llm.py
+
+diff --git a/README.md b/README.md
+index 12d18d3..bb8dfcc 100644
+--- a/README.md
++++ b/README.md
+@@ -10,7 +10,7 @@
+ 
+ <br>
+ 
+-**Emplode** Agent performs actions on your system by executing code locally, It can also serve as an agentic framework for your disposable sandbox projects. You can chat with Emplode in your terminal by running `emplode` after installing.
++**Emplode** performs actions on your system by executing code locally. You can chat with Emplode in your terminal by running `emplode` after installing.
+ 
+ This provides a natural-language interface to your system's general-purpose capabilities:
+ 
+@@ -46,35 +46,11 @@ emplode.chat() # Starts an interactive chat
+ 
+ ## Commands
+ 
+-### Change the Model
+-
+-For `gpt-3.5-turbo`, use fast mode:
+-
+-```shell
+-emplode --fast
+-```
+-
+-In Python, you will need to set the model manually:
+-
+-```python
+-emplode.model = "gpt-3.5-turbo"
+-```
+-
+-### Running Emplode locally
+-
+-You can run `emplode` in local mode from the command line to use `Code Llama`:
+-
+-```shell
+-emplode --local
+-```
+-
+-Or run any Hugging Face model **locally** by using its repo ID (e.g. "tiiuae/falcon-180B"):
+-
+-```shell
+-emplode --model nvidia/Llama-3.1-Nemotron-70B-Instruct
+-emplode --model meta-llama/Llama-3.2-11B-Vision-Instruct
+-```
++Emplode now uses a single model, `gpt-5`, everywhere. There is no model selection and no local model support.
+ 
++- `-y`, `--yes`: execute code without user confirmation
++- `-d`, `--debug`: prints extra information
++- `--version`: display current Emplode version
+ 
+ ### Configuration with .env
+ 
+@@ -84,15 +60,13 @@ Here's a sample .env configuration:
+ 
+ ```
+ EMPLODE_CLI_AUTO_RUN=False
+-EMPLODE_CLI_FAST_MODE=False
+-EMPLODE_CLI_LOCAL_RUN=False
+ EMPLODE_CLI_DEBUG=False
+ ```
+ 
+-You can modify these values in the .env file to change the default behavior of the Emplode
++You can modify these values in the .env file to change the default behavior of Emplode.
+ 
+ ## How Does it Work?
+ 
+-Emplode equips a [function-calling model](https://platform.openai.com/docs/guides/gpt/function-calling) with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
++Emplode equips a function-calling model with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
+ 
+ <br>
+diff --git a/emplode/cli.py b/emplode/cli.py
+index ad170d0..6b7c94a 100644
+--- a/emplode/cli.py
++++ b/emplode/cli.py
+@@ -6,7 +6,6 @@ from packaging import version
+ import pkg_resources
+ from rich import print as rprint
+ from rich.markdown import Markdown
+-import inquirer
+ 
+ load_dotenv()
+ 
+@@ -27,10 +26,7 @@ def cli(emplode):
+     pass
+ 
+   AUTO_RUN = os.getenv('EMPLODE_CLI_AUTO_RUN', 'False') == 'True'
+-  FAST_MODE = os.getenv('EMPLODE_CLI_FAST_MODE', 'False') == 'True'
+-  LOCAL_RUN = os.getenv('EMPLODE_CLI_LOCAL_RUN', 'False') == 'True'
+   DEBUG = os.getenv('EMPLODE_CLI_DEBUG', 'False') == 'True'
+-  USE_AZURE = os.getenv('EMPLODE_CLI_USE_AZURE', 'False') == 'True'
+ 
+   parser = argparse.ArgumentParser(description='Command Emplode.')
+   
+@@ -39,126 +35,26 @@ def cli(emplode):
+                       action='store_true',
+                       default=AUTO_RUN,
+                       help='execute code without user confirmation')
+-  parser.add_argument('-f',
+-                      '--fast',
+-                      action='store_true',
+-                      default=FAST_MODE,
+-                      help='use gpt-4o-mini instead of gpt-4o')
+-  parser.add_argument('-l',
+-                      '--local',
+-                      action='store_true',
+-                      default=LOCAL_RUN,
+-                      help='run fully local with code-llama')
+-  parser.add_argument(
+-                      '--falcon',
+-                      action='store_true',
+-                      default=False,
+-                      help='run fully local with falcon-40b')
+   parser.add_argument('-d',
+                       '--debug',
+                       action='store_true',
+                       default=DEBUG,
+                       help='prints extra information')
+   
+-  parser.add_argument('--model',
+-                      type=str,
+-                      help='model name (for OpenAI compatible APIs) or HuggingFace repo',
+-                      default="",
+-                      required=False)
+-  
+-  parser.add_argument('--max_tokens',
+-                      type=int,
+-                      help='max tokens generated (for locally run models)')
+-  parser.add_argument('--context_window',
+-                      type=int,
+-                      help='context window in tokens (for locally run models)')
+-  
+-  parser.add_argument('--api_base',
+-                      type=str,
+-                      help='change your api_base to any OpenAI compatible api',
+-                      default="",
+-                      required=False)
+-  
+-  parser.add_argument('--use-azure',
+-                      action='store_true',
+-                      default=USE_AZURE,
+-                      help='use Azure OpenAI Services')
+-  
+   parser.add_argument('--version',
+                       action='store_true',
+                       help='display current Emplode version')
+ 
+   args = parser.parse_args()
+ 
+-
+   if args.version:
+     print("Emplode", pkg_resources.get_distribution("emplode").version)
+     return
+ 
+-  if args.max_tokens:
+-    emplode.max_tokens = args.max_tokens
+-  if args.context_window:
+-    emplode.context_window = args.context_window
+-
+   if args.yes:
+     emplode.auto_run = True
+-  if args.fast:
+-    emplode.model = "gpt-4o-mini"
+-  if args.local and not args.falcon:
+-    
+-    rprint('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
+-        
+-    models = {
+-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-        '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
+-        '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
+-    }
+-    
+-    parameter_choices = list(models.keys())
+-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-    answers = inquirer.prompt(questions)
+-    chosen_param = answers['param']
+ 
+-    emplode.model = models[chosen_param]
+-    emplode.local = True
+-
+-  
+   if args.debug:
+     emplode.debug_mode = True
+-  if args.use_azure:
+-    emplode.use_azure = True
+-    emplode.local = False
+-
+-
+-  if args.model != "":
+-    emplode.model = args.model
+-
+-    if "/" in emplode.model:
+-      emplode.local = True
+-
+-  if args.api_base:
+-    emplode.api_base = args.api_base
+-
+-  if args.falcon or args.model == "tiiuae/falcon-180B":
+-    
+-    rprint('', Markdown("**Emplode** will use `Falcon` for local execution."), '')
+-        
+-    models = {
+-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-        '40B': 'YokaiKoibito/falcon-40b-GGUF',
+-        '180B': 'TheBloke/Falcon-180B-Chat-GGUF'
+-    }
+-    
+-    parameter_choices = list(models.keys())
+-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-    answers = inquirer.prompt(questions)
+-    chosen_param = answers['param']
+-
+-    if chosen_param == "180B":
+-      rprint(Markdown("> **WARNING:** To run `Falcon-180B` we recommend at least `100GB` of RAM."))
+-
+-    emplode.model = models[chosen_param]
+-    emplode.local = True
+-
+ 
+   emplode.chat()
+diff --git a/emplode/emplode.py b/emplode/emplode.py
+index f30176c..3daab29 100644
+--- a/emplode/emplode.py
++++ b/emplode/emplode.py
+@@ -3,17 +3,13 @@ from .utils import merge_deltas, parse_partial_json
+ from .message_block import MessageBlock
+ from .code_block import CodeBlock
+ from .code_emplode import CodeEmplode
+-from .get_hf_llm import get_hf_llm
+ 
+ import os
+ import time
+ import traceback
+ import json
+ import platform
+-import openai
+-import litellm
+-import pkg_resources
+-
++from openai import OpenAI
+ import getpass
+ import requests
+ import readline
+@@ -44,19 +40,7 @@ function_schema = {
+   },
+ }
+ 
+-missing_api_key_message = """> OpenAI API key not found
+-
+-To use `GPT-4o` (recommended) please provide an OpenAI API key.
+-
+-To use `Code-Llama` (free but less capable) press `enter`.
+-"""
+-
+-missing_azure_info_message = """> Azure OpenAI Service API info not found
+-
+-To use `GPT-4` (recommended) please provide an Azure OpenAI API key, a API base, a deployment name and a API version.
+-
+-To use `Code-Llama` (free but less capable) press `enter`.
+-"""
++missing_api_key_message = "> OpenAI API key not found\n\nTo use `GPT-5` please provide an OpenAI API key.\n"
+ 
+ confirm_mode_message = """
+ **Emplode** will require approval before running code. Use `emplode -y` to bypass this.
+@@ -72,17 +56,10 @@ class Emplode:
+     self.temperature = 0.001
+     self.api_key = None
+     self.auto_run = False
+-    self.local = False
+-    self.model = "gpt-4o"
++    self.model = "gpt-5"
+     self.debug_mode = False
+-    self.api_base = None 
+-    self.context_window = 2000 
++    self.context_window = 200000
+     self.max_tokens = 750
+-    self.use_azure = False
+-    self.azure_api_base = None
+-    self.azure_api_version = None
+-    self.azure_deployment_name = None
+-    self.azure_api_type = "azure"
+     here = os.path.abspath(os.path.dirname(__file__))
+     with open(os.path.join(here, 'system_message.txt'), 'r') as f:
+       self.system_message = f.read().strip()
+@@ -91,7 +68,7 @@ class Emplode:
+ 
+     self.active_block = None
+ 
+-    self.llama_instance = None
++    self.client = None
+ 
+   def cli(self):
+     cli(self)
+@@ -106,38 +83,33 @@ class Emplode:
+ 
+     info += f"[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"
+ 
+-    if not self.local:
+-
+-      query = []
+-      for message in self.messages[-2:]:
+-        message_for_semantic_search = {"role": message["role"]}
+-        if "content" in message:
+-          message_for_semantic_search["content"] = message["content"]
+-        if "function_call" in message and "parsed_arguments" in message["function_call"]:
+-          message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
+-        query.append(message_for_semantic_search)
++    query = []
++    for message in self.messages[-2:]:
++      message_for_semantic_search = {"role": message.get("role", "assistant")}
++      if "content" in message:
++        message_for_semantic_search["content"] = message["content"]
++      if "function_call" in message and "parsed_arguments" in message["function_call"]:
++        message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
++      query.append(message_for_semantic_search)
+ 
+-      url = "https://open-procedures.replit.app/search/"
++    url = "https://open-procedures.replit.app/search/"
+ 
+-      try:
+-        relevant_procedures = requests.get(url, data=json.dumps(query)).json()["procedures"]
++    try:
++      relevant_procedures = requests.get(url, data=json.dumps(query)).json().get("procedures", [])
++      if relevant_procedures:
+         info += "\n\n# Recommended Procedures\n" + "\n---\n".join(relevant_procedures) + "\nIn your plan, include steps and, if present, **EXACT CODE SNIPPETS** (especially for depracation notices, **WRITE THEM INTO YOUR PLAN -- underneath each numbered step** as they will VANISH once you execute your first line of code, so WRITE THEM DOWN NOW if you need them) from the above procedures if they are relevant to the task. Again, include **VERBATIM CODE SNIPPETS** from the procedures above if they are relevent to the task **directly in your plan.**"
+-      except:
+-        pass
++    except:
++      pass
+ 
+-    elif self.local:
+-      info += "\n\nTo run code, write a fenced code block (i.e ```python, R or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its output."
+     return info
+ 
+   def reset(self):
+-    
+     self.messages = []
+     self.code_emplodes = {}
+ 
+   def load(self, messages):
+     self.messages = messages
+ 
+-
+   def handle_undo(self, arguments):
+ 
+     if len(self.messages) == 0:
+@@ -159,7 +131,7 @@ class Emplode:
+       if 'content' in message and message['content'] != None:
+         print(Markdown(f"**Removed message:** `\"{message['content'][:30]}...\"`"))
+       elif 'function_call' in message:
+-        print(Markdown(f"**Removed codeblock**")) # TODO: Could add preview of code removed here.
++        print(Markdown(f"**Removed codeblock**"))
+     
+     print("") 
+   def handle_help(self, arguments):
+@@ -246,48 +218,17 @@ class Emplode:
+ 
+   def chat(self, message=None, return_messages=False):
+ 
+-    if not self.local:
+-      self.verify_api_key()
+-
+-    if self.local:
+-
+-      if self.llama_instance == None:
+-        try:
+-          self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
+-          if self.llama_instance == None:
+-            return
+-        except:
+-          traceback.print_exc()
+-
+-          print(Markdown("".join([
+-            f"> Failed to install `{self.model}`.",
+-            f"\n\n**Common Fixes:** You can follow our simple setup docs at the link below to resolve common errors.\n\n```\nhttps://github.com/emplodeai/emplode/\n```",
+-            f"\n\n**If you've tried that and you're still getting an error, we have likely not built the proper `{self.model}` support for your system.**",
+-            "\n\n*( Running language models locally is a difficult task!* If you have insight into the best way to implement this across platforms/architectures, please join the Emplode community Discord and consider contributing the project's development. )",
+-            "\n\nPress enter to switch to `GPT-4o` (recommended)."
+-          ])))
+-          input()
+-
+-          self.local = False
+-          self.model = "gpt-4o"
+-          self.verify_api_key()
++    self.verify_api_key()
+ 
+     welcome_message = ""
+ 
+     if self.debug_mode:
+       welcome_message += "> Entered debug mode"
+ 
+-    if not self.local and not self.auto_run:
+-
+-      if self.use_azure:
+-        notice_model = f"{self.azure_deployment_name} (Azure)"
+-      else:
+-        notice_model = f"{self.model.upper()}"
+-      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To run locally, use `emplode --local`"
+-      
+-    if self.local:
+-      welcome_message += f"\n> Model set to `{self.model}`"
+-
++    if not self.auto_run:
++      notice_model = f"{self.model.upper()}"
++      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To auto-run code, use `emplode -y`"
++    
+     if not self.auto_run:
+       welcome_message += "\n\n" + confirm_mode_message
+ 
+@@ -326,132 +267,34 @@ class Emplode:
+         except KeyboardInterrupt:
+           pass
+         finally:
+-      
+           self.end_active_block()
+ 
+     if return_messages:
+         return self.messages
+ 
+   def verify_api_key(self):
+-    if self.use_azure:
+-      all_env_available = (
+-        ('AZURE_API_KEY' in os.environ or 'OPENAI_API_KEY' in os.environ) and
+-        'AZURE_API_BASE' in os.environ and
+-        'AZURE_API_VERSION' in os.environ and
+-        'AZURE_DEPLOYMENT_NAME' in os.environ)
+-      if all_env_available:
+-        self.api_key = os.environ.get('AZURE_API_KEY') or os.environ['OPENAI_API_KEY']
+-        self.azure_api_base = os.environ['AZURE_API_BASE']
+-        self.azure_api_version = os.environ['AZURE_API_VERSION']
+-        self.azure_deployment_name = os.environ['AZURE_DEPLOYMENT_NAME']
+-        self.azure_api_type = os.environ.get('AZURE_API_TYPE', 'azure')
++    if self.api_key is None:
++      if 'OPENAI_API_KEY' in os.environ:
++        self.api_key = os.environ['OPENAI_API_KEY']
+       else:
+         self._print_welcome_message()
+         time.sleep(1)
+ 
+         print(Rule(style="white"))
+ 
+-        print(Markdown(missing_azure_info_message), '', Rule(style="white"), '')
+-        response = input("Azure OpenAI API key: ")
++        print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
++        response = input("OpenAI API key: ")
+ 
+         if response == "":
+-
+-          print(Markdown(
+-            "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
+-                '')
+-          time.sleep(2)
+-          print(Rule(style="white"))
+-
+-          import inquirer
+-
+-          print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
+-
+-          models = {
+-              '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-              '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
+-              '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
+-          }
+-
+-          parameter_choices = list(models.keys())
+-          questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-          answers = inquirer.prompt(questions)
+-          chosen_param = answers['param']
+-
+-          self.model = models[chosen_param]
+-          self.local = True
+-
+-
+-
+-
+-          return
+-
++          raise Exception("OpenAI API key is required to use Emplode with GPT-5.")
+         else:
+           self.api_key = response
+-          self.azure_api_base = input("Azure OpenAI API base: ")
+-          self.azure_deployment_name = input("Azure OpenAI deployment name of GPT: ")
+-          self.azure_api_version = input("Azure OpenAI API version: ")
+-          print('', Markdown(
+-            "**Tip:** To save this key for later, run `export AZURE_API_KEY=your_api_key AZURE_API_BASE=your_api_base AZURE_API_VERSION=your_api_version AZURE_DEPLOYMENT_NAME=your_gpt_deployment_name` on Mac/Linux or `setx AZURE_API_KEY your_api_key AZURE_API_BASE your_api_base AZURE_API_VERSION your_api_version AZURE_DEPLOYMENT_NAME your_gpt_deployment_name` on Windows."),
+-                '')
++          print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
+           time.sleep(2)
+           print(Rule(style="white"))
+ 
+-      litellm.api_type = self.azure_api_type
+-      litellm.api_base = self.azure_api_base
+-      litellm.api_version = self.azure_api_version
+-      litellm.api_key = self.api_key
+-    else:
+-      if self.api_key == None:
+-        if 'OPENAI_API_KEY' in os.environ:
+-          self.api_key = os.environ['OPENAI_API_KEY']
+-        else:
+-          self._print_welcome_message()
+-          time.sleep(1)
+-
+-          print(Rule(style="white"))
+-
+-          print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
+-          response = input("OpenAI API key: ")
+-
+-          if response == "":
+-
+-              print(Markdown(
+-                "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
+-                    '')
+-              time.sleep(2)
+-              print(Rule(style="white"))
+-
+-              import inquirer
+-
+-              print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
+-
+-              models = {
+-                  '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
+-                  '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
+-                  '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
+-              }
+-
+-              parameter_choices = list(models.keys())
+-              questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
+-              answers = inquirer.prompt(questions)
+-              chosen_param = answers['param']
+-              self.model = models[chosen_param]
+-              self.local = True
+-
+-
+-
+-
+-              return
+-
+-          else:
+-              self.api_key = response
+-              print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
+-              time.sleep(2)
+-              print(Rule(style="white"))
+-
+-      litellm.api_key = self.api_key
+-      if self.api_base:
+-        litellm.api_base = self.api_base
++    if self.client is None:
++      self.client = OpenAI(api_key=self.api_key)
+ 
+   def end_active_block(self):
+     if self.active_block:
+@@ -461,149 +304,51 @@ class Emplode:
+   def respond(self):
+     info = self.get_info_for_system_message()
+ 
+-    if self.local:
+-      self.system_message = "\n".join(self.system_message.split("\n")[:2])
+-      self.system_message += "\nOnly do what the user asks you to do, then ask what they'd like to do next."
+-
+     system_message = self.system_message + "\n\n" + info
+ 
+-    if self.local:
+-      messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
+-    else:
+-      messages = tt.trim(self.messages, self.model, system_message=system_message)
++    messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
+ 
+     if self.debug_mode:
+       print("\n", "Sending `messages` to LLM:", "\n")
+       print(messages)
+       print()
+ 
+-    if not self.local:
+-      
+-      error = ""
+-      
+-      for _ in range(3): 
+-        try:
+-
+-            if self.use_azure:
+-              response = litellm.completion(
+-                  f"azure/{self.azure_deployment_name}",
+-                  messages=messages,
+-                  functions=[function_schema],
+-                  temperature=self.temperature,
+-                  stream=True,
+-                  )
+-            else:
+-              if self.api_base:
+-                response = litellm.completion(
+-                  api_base=self.api_base,
+-                  model = "custom/" + self.model,
+-                  messages=messages,
+-                  functions=[function_schema],
+-                  stream=True,
+-                  temperature=self.temperature,
+-                )
+-              else:
+-                response = litellm.completion(
+-                  model=self.model,
+-                  messages=messages,
+-                  functions=[function_schema],
+-                  stream=True,
+-                  temperature=self.temperature,
+-                )
+-
+-            break
+-        except:
+-            if self.debug_mode:
+-              traceback.print_exc()
+-            error = traceback.format_exc()
+-            time.sleep(3)
+-      else:
+-        raise Exception(error)
+-            
+-    elif self.local:
+-
+-      def messages_to_prompt(messages):
+-
++    error = ""
+ 
+-        for message in messages:
+-          if "role" not in message:
+-            message["role"] = "assistant"
+-
+-        if "falcon" in self.model.lower():
+-
+-          formatted_messages = ""
+-          for message in messages:
+-            formatted_messages += f"{message['role'].capitalize()}: {message['content']}\n"
+-          formatted_messages = formatted_messages.strip()
+-
+-        else:
+-          
+-          system_prompt = messages[0]['content']
+-          formatted_messages = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"
+-
+-          for index, item in enumerate(messages[1:]):
+-              role = item['role']
+-              content = item['content']
+-
+-              if role == 'user':
+-                  formatted_messages += f"{content} [/INST] "
+-              elif role == 'function':
+-                  formatted_messages += f"Output: {content} [/INST] "
+-              elif role == 'assistant':
+-                  formatted_messages += f"{content} </s><s>[INST] "
+-
+-          if formatted_messages.endswith("<s>[INST] "):
+-              formatted_messages = formatted_messages[:-10]
+-
+-        return formatted_messages
+-
+-      prompt = messages_to_prompt(messages)
+-      if messages[-1]["role"] != "function":
+-        prompt += "Let's explore this. By the way, I can run code on your machine by writing the code in a markdown code block. This works for shell, javascript, python, R, and applescript. I'm going to try to do this for your task. Anyway, "
+-      elif messages[-1]["role"] == "function" and messages[-1]["content"] != "No output":
+-        prompt += "Given the output of the code I just ran, "
+-      elif messages[-1]["role"] == "function" and messages[-1]["content"] == "No output":
+-        prompt += "Given the fact that the code I just ran produced no output, "
+-
+-
+-      if self.debug_mode:
+-        import builtins
+-        builtins.print("TEXT PROMPT SEND TO LLM:\n", prompt)
+-
+-      response = self.llama_instance(
+-        prompt,
+-        stream=True,
+-        temperature=self.temperature,
+-        stop=["</s>"],
+-        max_tokens=750 
+-      )
++    for _ in range(3): 
++      try:
++        response = self.client.chat.completions.create(
++          model=self.model,
++          messages=messages,
++          functions=[function_schema],
++          temperature=self.temperature,
++          stream=True,
++        )
++        break
++      except:
++        if self.debug_mode:
++          traceback.print_exc()
++        error = traceback.format_exc()
++        time.sleep(3)
++    else:
++      raise Exception(error)
+ 
+     self.messages.append({})
+     in_function_call = False
+-    llama_function_call_finished = False
+     self.active_block = None
+ 
+     for chunk in response:
+-      if self.use_azure and ('choices' not in chunk or len(chunk['choices']) == 0):
+-        continue
+-
+-      if self.local:
+-        if "content" not in messages[-1]:
+-          chunk["choices"][0]["text"] = chunk["choices"][0]["text"].capitalize()
+-          messages[-1]["role"] = "assistant"
+-        delta = {"content": chunk["choices"][0]["text"]}
+-      else:
+-        delta = chunk["choices"][0]["delta"]
++      try:
++        chunk_dict = chunk.model_dump()
++      except Exception:
++        chunk_dict = chunk
++
++      delta = chunk_dict.get("choices", [{}])[0].get("delta", {})
++      finish_reason = chunk_dict.get("choices", [{}])[0].get("finish_reason")
+ 
+       self.messages[-1] = merge_deltas(self.messages[-1], delta)
+ 
+-      if not self.local:
+-        condition = "function_call" in self.messages[-1]
+-      elif self.local:
+-        if "content" in self.messages[-1]:
+-          condition = self.messages[-1]["content"].count("```") % 2 == 1
+-        else:
+-          condition = False
++      condition = "function_call" in self.messages[-1]
+ 
+       if condition:
+         if in_function_call == False:
+@@ -618,68 +363,24 @@ class Emplode:
+ 
+         in_function_call = True
+ 
+-        if not self.local:
+-          if "arguments" in self.messages[-1]["function_call"]:
+-            arguments = self.messages[-1]["function_call"]["arguments"]
+-            new_parsed_arguments = parse_partial_json(arguments)
+-            if new_parsed_arguments:
+-              self.messages[-1]["function_call"][
+-                "parsed_arguments"] = new_parsed_arguments
+-
+-        elif self.local:
+-          if "content" in self.messages[-1]:
+-
+-            content = self.messages[-1]["content"]
+-
+-            if "```" in content:
+-              blocks = content.split("```")
+-
+-              current_code_block = blocks[-1]
+-
+-              lines = current_code_block.split("\n")
+-
+-              if content.strip() == "```": 
+-                language = None
+-              else:
+-                if lines[0] != "":
+-                  language = lines[0].strip()
+-                else:
+-                  language = "python"
+-                  if len(lines) > 1:
+-                    if lines[1].startswith("pip"):
+-                      language = "shell"
+-
+-              code = '\n'.join(lines[1:]).strip("` \n")
+-
+-              arguments = {"code": code}
+-              if language: 
+-                if language == "bash":
+-                  language = "shell"
+-                arguments["language"] = language
+-
+-            if "function_call" not in self.messages[-1]:
+-              self.messages[-1]["function_call"] = {}
+-
+-            self.messages[-1]["function_call"]["parsed_arguments"] = arguments
++        if "arguments" in self.messages[-1]["function_call"]:
++          arguments = self.messages[-1]["function_call"]["arguments"]
++          new_parsed_arguments = parse_partial_json(arguments)
++          if new_parsed_arguments:
++            self.messages[-1]["function_call"][
++              "parsed_arguments"] = new_parsed_arguments
+ 
+       else:
+         if in_function_call == True:
+-
+-          if self.local:
+-          
+-            llama_function_call_finished = True
+-
+-        in_function_call = False
++          in_function_call = False
+ 
+         if self.active_block == None:
+-
+           self.active_block = MessageBlock()
+ 
+       self.active_block.update_from_message(self.messages[-1])
+ 
+-      if chunk["choices"][0]["finish_reason"] or llama_function_call_finished:
+-        if chunk["choices"][
+-            0]["finish_reason"] == "function_call" or llama_function_call_finished:
++      if finish_reason:
++        if finish_reason == "function_call":
+ 
+           if self.debug_mode:
+             print("Running function:")
+@@ -712,7 +413,7 @@ class Emplode:
+               })
+               return
+ 
+-          if not self.local and "parsed_arguments" not in self.messages[-1]["function_call"]:
++          if "parsed_arguments" not in self.messages[-1]["function_call"]:
+ 
+             self.messages.append({
+               "role": "function",
+@@ -742,9 +443,8 @@ class Emplode:
+ 
+           self.respond()
+ 
+-        if chunk["choices"][0]["finish_reason"] != "function_call":
+-
+-          if self.local and "content" in self.messages[-1]:
++        else:
++          if "content" in self.messages[-1]:
+             self.messages[-1]["content"] = self.messages[-1]["content"].strip().rstrip("#")
+             self.active_block.update_from_message(self.messages[-1])
+             time.sleep(0.1)
+diff --git a/emplode/get_hf_llm.py b/emplode/get_hf_llm.py
+deleted file mode 100644
+index a93b02e..0000000
+--- a/emplode/get_hf_llm.py
++++ /dev/null
+@@ -1,291 +0,0 @@
+-import os
+-import sys
+-import appdirs
+-import traceback
+-import inquirer
+-import subprocess
+-from rich import print
+-from rich.markdown import Markdown
+-import os
+-import shutil
+-from huggingface_hub import list_files_info, hf_hub_download
+-
+-
+-def get_hf_llm(repo_id, debug_mode, context_window):
+-
+-    if "TheBloke/CodeLlama-" not in repo_id:
+-      print('', Markdown(f"**Emplode** will use `{repo_id}` for local execution."), '')
+-
+-    raw_models = list_gguf_files(repo_id)
+-    
+-    if not raw_models:
+-        print(f"Failed. Are you sure there are GGUF files in `{repo_id}`?")
+-        return None
+-
+-    combined_models = group_and_combine_splits(raw_models)
+-
+-    selected_model = None
+-
+-    if len(combined_models) > 3:
+-
+-        choices = [
+-            format_quality_choice(combined_models[0], "Small"),
+-            format_quality_choice(combined_models[len(combined_models) // 2], "Medium"),
+-            format_quality_choice(combined_models[-1], "Large"),
+-            "See More"
+-        ]
+-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
+-        answers = inquirer.prompt(questions)
+-        if answers["selected_model"].startswith("Small"):
+-            selected_model = combined_models[0]["filename"]
+-        elif answers["selected_model"].startswith("Medium"):
+-            selected_model = combined_models[len(combined_models) // 2]["filename"]
+-        elif answers["selected_model"].startswith("Large"):
+-            selected_model = combined_models[-1]["filename"]
+-    
+-    if selected_model == None:
+-      
+-        choices = [format_quality_choice(model) for model in combined_models]
+-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
+-        answers = inquirer.prompt(questions)
+-        for model in combined_models:
+-            if format_quality_choice(model) == answers["selected_model"]:
+-                selected_model = model["filename"]
+-                break
+-
+-    if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"):
+-      n_gpu_layers = -1
+-    else:
+-      n_gpu_layers = 0
+-
+-    user_data_dir = appdirs.user_data_dir("Emplode")
+-    default_path = os.path.join(user_data_dir, "models")
+-
+-    os.makedirs(default_path, exist_ok=True)
+-
+-    directories_to_check = [
+-        default_path,
+-        "llama.cpp/models/",
+-        os.path.expanduser("~") + "/llama.cpp/models/",
+-        "/"
+-    ]
+-
+-    for directory in directories_to_check:
+-        path = os.path.join(directory, selected_model)
+-        if os.path.exists(path):
+-            model_path = path
+-            break
+-    else:
+-        download_path = os.path.join(default_path, selected_model)
+-      
+-        print(f"This language model was not found on your system.\n\nDownload to `{default_path}`?", "")
+-        if confirm_action(""):
+-            for model_details in combined_models:
+-                if model_details["filename"] == selected_model:
+-                    selected_model_details = model_details
+-
+-                    if not enough_disk_space(selected_model_details['Size'], default_path):
+-                        print(f"You do not have enough disk space available to download this model.")
+-                        return None
+-
+-            split_files = [model["filename"] for model in raw_models if selected_model in model["filename"]]
+-            
+-            if len(split_files) > 1:
+-                for split_file in split_files:
+-                    split_path = os.path.join(default_path, split_file)
+-                    if os.path.exists(split_path):
+-                        if not confirm_action(f"Split file {split_path} already exists. Download again?"):
+-                            continue
+-                    hf_hub_download(
+-                        repo_id=repo_id,
+-                        filename=split_file,
+-                        local_dir=default_path,
+-                        local_dir_use_symlinks=False,
+-                        resume_download=True)
+-        
+-                actually_combine_files(default_path, selected_model, split_files)
+-            else:
+-                hf_hub_download(
+-                    repo_id=repo_id,
+-                    filename=selected_model,
+-                    local_dir=default_path,
+-                    local_dir_use_symlinks=False,
+-                    resume_download=True)
+-
+-            model_path = download_path
+-        
+-        else:
+-            print('\n', "Download cancelled. Exiting.", '\n')
+-            return None
+-
+-    print(Markdown(f"Model found at `{model_path}`"))
+-  
+-    try:
+-        from llama_cpp import Llama
+-    except:
+-        if debug_mode:
+-            traceback.print_exc()
+-        message = "Local LLM interface package not found. Install `llama-cpp-python`?"
+-        if confirm_action(message):
+-    
+-            import platform
+-            
+-            def check_command(command):
+-                try:
+-                    subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+-                    return True
+-                except subprocess.CalledProcessError:
+-                    return False
+-                except FileNotFoundError:
+-                    return False
+-            
+-            def install_llama(backend):
+-                env_vars = {
+-                    "FORCE_CMAKE": "1"
+-                }
+-                
+-                if backend == "cuBLAS":
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
+-                elif backend == "hipBLAS":
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_HIPBLAS=on"
+-                elif backend == "Metal":
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_METAL=on"
+-                else: 
+-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
+-                
+-                try:
+-                    subprocess.run([sys.executable, "-m", "pip", "install", "llama-cpp-python"], env={**os.environ, **env_vars}, check=True)
+-                except subprocess.CalledProcessError as e:
+-                    print(f"Error during installation with {backend}: {e}")
+-            
+-            def supports_metal():
+-                if platform.system() == "Darwin":
+-                    mac_version = tuple(map(int, platform.mac_ver()[0].split('.')))
+-                    if mac_version >= (10, 11):
+-                        return True
+-                return False
+-        
+-            if check_command(["nvidia-smi"]):
+-                install_llama("cuBLAS")
+-            elif check_command(["rocminfo"]):
+-                install_llama("hipBLAS")
+-            elif supports_metal():
+-                install_llama("Metal")
+-            else:
+-                install_llama("OpenBLAS")
+-          
+-            from llama_cpp import Llama
+-            print('', Markdown("Finished downloading `Code-Llama` interface."), '')
+-
+-            if platform.system() == "Darwin":
+-                if platform.machine() != "arm64":
+-                    print("Warning: You are using Apple Silicon (M1/M2) Mac but your Python is not of 'arm64' architecture.")
+-                    print("The llama.ccp x86 version will be 10x slower on Apple Silicon (M1/M2) Mac.")
+-                    print("\nTo install the correct version of Python that supports 'arm64' architecture:")
+-                    print("1. Download Miniforge for M1/M2:")
+-                    print("wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh")
+-                    print("2. Install it:")
+-                    print("bash Miniforge3-MacOSX-arm64.sh")
+-                    print("")
+-      
+-        else:
+-            print('', "Installation cancelled. Exiting.", '')
+-            return None
+-        
+-    assert os.path.isfile(model_path)
+-    llama_2 = Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)
+-      
+-    return llama_2
+-
+-def confirm_action(message):
+-    question = [
+-        inquirer.Confirm('confirm',
+-                         message=message,
+-                         default=True),
+-    ]
+-
+-    answers = inquirer.prompt(question)
+-    return answers['confirm']
+-
+-
+-import os
+-import inquirer
+-from huggingface_hub import list_files_info, hf_hub_download, login
+-from typing import Dict, List, Union
+-
+-def list_gguf_files(repo_id: str) -> List[Dict[str, Union[str, float]]]:
+-    try:
+-      files_info = list_files_info(repo_id=repo_id)
+-    except Exception as e:
+-      if "authentication" in str(e).lower():
+-        print("You likely need to be logged in to HuggingFace to access this language model.")
+-        print(f"Visit this URL to log in and apply for access to this language model: https://huggingface.co/{repo_id}")
+-        print("Then, log in here:")
+-        login()
+-        files_info = list_files_info(repo_id=repo_id)
+-  
+-    gguf_files = [file for file in files_info if "gguf" in file.rfilename]
+-
+-    gguf_files = sorted(gguf_files, key=lambda x: x.size)
+-
+-    result = []
+-    for file in gguf_files:
+-        size_in_gb = file.size / (1024**3)
+-        filename = file.rfilename
+-        result.append({
+-            "filename": filename,
+-            "Size": size_in_gb,
+-            "RAM": size_in_gb + 2.5,
+-        })
+-
+-    return result
+-
+-from typing import List, Dict, Union
+-
+-def group_and_combine_splits(models: List[Dict[str, Union[str, float]]]) -> List[Dict[str, Union[str, float]]]:
+-    grouped_files = {}
+-
+-    for model in models:
+-        base_name = model["filename"].split('-split-')[0]
+-        
+-        if base_name in grouped_files:
+-            grouped_files[base_name]["Size"] += model["Size"]
+-            grouped_files[base_name]["RAM"] += model["RAM"]
+-            grouped_files[base_name]["SPLITS"].append(model["filename"])
+-        else:
+-            grouped_files[base_name] = {
+-                "filename": base_name,
+-                "Size": model["Size"],
+-                "RAM": model["RAM"],
+-                "SPLITS": [model["filename"]]
+-            }
+-
+-    return list(grouped_files.values())
+-
+-
+-def actually_combine_files(default_path: str, base_name: str, files: List[str]) -> None:
+-    files.sort()    
+-    base_path = os.path.join(default_path, base_name)
+-    with open(base_path, 'wb') as outfile:
+-        for file in files:
+-            file_path = os.path.join(default_path, file)
+-            with open(file_path, 'rb') as infile:
+-                outfile.write(infile.read())
+-            os.remove(file_path)
+-
+-def format_quality_choice(model, name_override = None) -> str:
+-    if name_override:
+-        name = name_override
+-    else:
+-        name = model['filename']
+-    return f"{name} | Size: {model['Size']:.1f} GB, Estimated RAM usage: {model['RAM']:.1f} GB"
+-
+-def enough_disk_space(size, path) -> bool:
+-    _, _, free = shutil.disk_usage(path)
+-
+-    free_gb = free / (2**30) 
+-
+-    if free_gb > size:
+-        return True
+-
+-    return False
+diff --git a/pyproject.toml b/pyproject.toml
+index 7203053..1dfcc8f 100644
+--- a/pyproject.toml
++++ b/pyproject.toml
+@@ -10,26 +10,16 @@ readme = "README.md"
+ 
+ [tool.poetry.dependencies]
+ python = "^3.10"
+-openai = "^0.27.8"
+-rich = "^13.4.2"
+-tiktoken = "^0.4.0"
+-astor = "^0.8.1"
+-git-python = "^1.0.3"
+-tokentrim = "^0.1.9"
+-appdirs = "^1.4.4"
+-six = "^1.16.0"
+-python-dotenv = "^1.0.0"
+-
+-inquirer = "^3.1.3"
+-wget = "^3.2"
+-huggingface-hub = "^0.16.4"
+-litellm = "^0.1.590"
+-[tool.poetry.dependencies.pyreadline3]
+-version = "^3.4.1"
+-markers = "sys_platform == 'win32'"
++openai = "^1.0.0"
++rich = "*"
++tiktoken = "*"
++tokentrim = "*"
++python-dotenv = "*"
++requests = "*"
++packaging = "*"
+ 
+ [tool.poetry.group.dev.dependencies]
+-pytest = "^7.4.0"
++pytest = "*"
+ 
+ [build-system]
+ requires = ["poetry-core>=1.0.0"]
+-- 
+2.30.2
+
diff --git a/.capy/pr-body-gpt5-only.md b/.capy/pr-body-gpt5-only.md
new file mode 100644
index 0000000..91a65fd
--- /dev/null
+++ b/.capy/pr-body-gpt5-only.md
@@ -0,0 +1,31 @@
+Title: Migrate Emplode to GPT-5 only, remove LiteLLM, and update deps
+
+Summary
+- Standardize the project on a single LLM: GPT-5. Removed all alternative model paths (LiteLLM, Azure/OpenAI switches, local HuggingFace/Code Llama, Falcon) and simplified CLI and runtime accordingly.
+- Replace LiteLLM with the official OpenAI client and streaming chat completions; refactor message streaming to keep existing UX (MessageBlock/CodeBlock) intact.
+- Update Poetry dependencies to latest-compatible constraints and remove unused libs; refresh README to match the new, simplified flow.
+
+Details
+- Core:
+  - Default model set to `gpt-5`; all model selection flags and local/azure code paths removed.
+  - Switched from LiteLLM to `openai` client (>=1.x) with streaming + function-calling (`run_code`).
+  - Unified message trimming to a token-window approach to avoid model-name coupling.
+- CLI:
+  - Simplified to only `--yes`, `--debug`, and `--version`.
+  - Removed `--fast`, `--local`, `--falcon`, `--model`, `--api_base`, and `--use-azure`.
+- Deps:
+  - Removed: `litellm`, `huggingface-hub`, `inquirer`, `appdirs`, `wget`, `six`, `git-python`, `astor`.
+  - Added: `requests`, `packaging`. Updated constraints for `openai`, `rich`, `tiktoken`, `tokentrim`, `python-dotenv`.
+- Docs:
+  - README now states GPT-5-only usage and the new CLI flags; removed local/HF and fast-mode sections.
+
+Impact
+- Leaner, single-path runtime; fewer moving parts and less configuration.
+- No more local/HF model downloads or Azure branching; requires only `OPENAI_API_KEY`.
+- Dependency surface reduced; easier to maintain and upgrade going forward.
+
+Notes
+- This change removes all alternate model support intentionally per request; if a compatibility shim is desired (e.g., token counting for unknown models), we can add that in a follow-up.
+
+
+₍ᐢ•(ܫ)•ᐢ₎ Generated by [Capy](https://capy.ai) ([view task](https://capy.ai/project/5719ac6b-84af-11f0-a94e-3eef481a796b/task/768ae859-543c-481f-b4a9-514f56c81a6f))
\ No newline at end of file
diff --git a/README.md b/README.md
index 12d18d3..808f581 100644
--- a/README.md
+++ b/README.md
@@ -1,98 +1,28 @@
-<h1 align="center">/Emplode.</h1>
+<h1 align="center">Emplode</h1>
 
-<p align="center">
-    <a href="https://discord.gg/uZmvdFpSyW">
-        <img alt="Discord" src="https://img.shields.io/discord/1172527582684651600?logo=discord&style=flat&logoColor=white"/>
-    </a>
-    <br><br>
-    <b>Agent that performs action on your system by executing code.</b>
-</p>
-
-<br>
-
-**Emplode** Agent performs actions on your system by executing code locally, It can also serve as an agentic framework for your disposable sandbox projects. You can chat with Emplode in your terminal by running `emplode` after installing.
-
-This provides a natural-language interface to your system's general-purpose capabilities:
-
-- Create, edit and arrange files.
-- Control a browser to perform research
-- Plot, clean, and analyze large datasets
-- ...etc.
-
-<br>
+Simple terminal agent that executes code on your machine.
 
 ## Quick Start
 
 ```shell
 pip install emplode
-```
-
-### Terminal
-
-After installation, simply run `emplode`:
-
-```shell
 emplode
 ```
 
-### Python
+## Python
 
 ```python
 import emplode
-
-emplode.chat("Organize all images in my downloads folder into subfolders by year, naming each folder after the year.") # Executes a single command
-emplode.chat() # Starts an interactive chat
-```
-
-## Commands
-
-### Change the Model
-
-For `gpt-3.5-turbo`, use fast mode:
-
-```shell
-emplode --fast
-```
-
-In Python, you will need to set the model manually:
-
-```python
-emplode.model = "gpt-3.5-turbo"
-```
-
-### Running Emplode locally
-
-You can run `emplode` in local mode from the command line to use `Code Llama`:
-
-```shell
-emplode --local
-```
-
-Or run any Hugging Face model **locally** by using its repo ID (e.g. "tiiuae/falcon-180B"):
-
-```shell
-emplode --model nvidia/Llama-3.1-Nemotron-70B-Instruct
-emplode --model meta-llama/Llama-3.2-11B-Vision-Instruct
+emplode.chat("Organize my downloads by year.")
+emplode.chat()
 ```
 
+## CLI
 
-### Configuration with .env
-
-Emplode allows you to set default behaviors using a .env file. This provides a flexible way to configure it without changing command-line arguments every time.
-
-Here's a sample .env configuration:
-
-```
-EMPLODE_CLI_AUTO_RUN=False
-EMPLODE_CLI_FAST_MODE=False
-EMPLODE_CLI_LOCAL_RUN=False
-EMPLODE_CLI_DEBUG=False
-```
-
-You can modify these values in the .env file to change the default behavior of the Emplode
+Only one flag is supported:
 
-## How Does it Work?
+- `-y` / `--yes`: run code without asking for confirmation.
 
-Emplode equips a [function-calling model](https://platform.openai.com/docs/guides/gpt/function-calling) with an `exec()` function, which accepts a `language` (like "Python" or "JavaScript") and `code` to run.
+## How it works
 
-<br>
+Emplode uses a function-calling model (gpt-5) with a single function `run_code(language, code)`. When the model calls the function, the code is executed locally and the output is returned to the model.
diff --git a/emplode/cli.py b/emplode/cli.py
index ad170d0..640fb0d 100644
--- a/emplode/cli.py
+++ b/emplode/cli.py
@@ -1,164 +1,11 @@
 import argparse
-import os
-from dotenv import load_dotenv
-import requests
-from packaging import version
-import pkg_resources
-from rich import print as rprint
-from rich.markdown import Markdown
-import inquirer
-
-load_dotenv()
-
-def check_for_update():
-    response = requests.get(f'https://pypi.org/pypi/emplode/json')
-    latest_version = response.json()['info']['version']
-
-    current_version = pkg_resources.get_distribution("emplode").version
-
-    return version.parse(latest_version) > version.parse(current_version)
 
 def cli(emplode):
-
-  try:
-    if check_for_update():
-      print("A new version is available. Please run 'pip install --upgrade emplode'.")
-  except:
-    pass
-
-  AUTO_RUN = os.getenv('EMPLODE_CLI_AUTO_RUN', 'False') == 'True'
-  FAST_MODE = os.getenv('EMPLODE_CLI_FAST_MODE', 'False') == 'True'
-  LOCAL_RUN = os.getenv('EMPLODE_CLI_LOCAL_RUN', 'False') == 'True'
-  DEBUG = os.getenv('EMPLODE_CLI_DEBUG', 'False') == 'True'
-  USE_AZURE = os.getenv('EMPLODE_CLI_USE_AZURE', 'False') == 'True'
-
-  parser = argparse.ArgumentParser(description='Command Emplode.')
-  
-  parser.add_argument('-y',
-                      '--yes',
-                      action='store_true',
-                      default=AUTO_RUN,
-                      help='execute code without user confirmation')
-  parser.add_argument('-f',
-                      '--fast',
-                      action='store_true',
-                      default=FAST_MODE,
-                      help='use gpt-4o-mini instead of gpt-4o')
-  parser.add_argument('-l',
-                      '--local',
-                      action='store_true',
-                      default=LOCAL_RUN,
-                      help='run fully local with code-llama')
-  parser.add_argument(
-                      '--falcon',
-                      action='store_true',
-                      default=False,
-                      help='run fully local with falcon-40b')
-  parser.add_argument('-d',
-                      '--debug',
-                      action='store_true',
-                      default=DEBUG,
-                      help='prints extra information')
-  
-  parser.add_argument('--model',
-                      type=str,
-                      help='model name (for OpenAI compatible APIs) or HuggingFace repo',
-                      default="",
-                      required=False)
-  
-  parser.add_argument('--max_tokens',
-                      type=int,
-                      help='max tokens generated (for locally run models)')
-  parser.add_argument('--context_window',
-                      type=int,
-                      help='context window in tokens (for locally run models)')
-  
-  parser.add_argument('--api_base',
-                      type=str,
-                      help='change your api_base to any OpenAI compatible api',
-                      default="",
-                      required=False)
-  
-  parser.add_argument('--use-azure',
-                      action='store_true',
-                      default=USE_AZURE,
-                      help='use Azure OpenAI Services')
-  
-  parser.add_argument('--version',
-                      action='store_true',
-                      help='display current Emplode version')
-
+  parser = argparse.ArgumentParser(description='Emplode')
+  parser.add_argument('-y', '--yes', action='store_true', help='execute code without confirmation')
   args = parser.parse_args()
 
-
-  if args.version:
-    print("Emplode", pkg_resources.get_distribution("emplode").version)
-    return
-
-  if args.max_tokens:
-    emplode.max_tokens = args.max_tokens
-  if args.context_window:
-    emplode.context_window = args.context_window
-
   if args.yes:
     emplode.auto_run = True
-  if args.fast:
-    emplode.model = "gpt-4o-mini"
-  if args.local and not args.falcon:
-    
-    rprint('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
-        
-    models = {
-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-        '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-        '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-    }
-    
-    parameter_choices = list(models.keys())
-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-    answers = inquirer.prompt(questions)
-    chosen_param = answers['param']
-
-    emplode.model = models[chosen_param]
-    emplode.local = True
-
-  
-  if args.debug:
-    emplode.debug_mode = True
-  if args.use_azure:
-    emplode.use_azure = True
-    emplode.local = False
-
-
-  if args.model != "":
-    emplode.model = args.model
-
-    if "/" in emplode.model:
-      emplode.local = True
-
-  if args.api_base:
-    emplode.api_base = args.api_base
-
-  if args.falcon or args.model == "tiiuae/falcon-180B":
-    
-    rprint('', Markdown("**Emplode** will use `Falcon` for local execution."), '')
-        
-    models = {
-        '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-        '40B': 'YokaiKoibito/falcon-40b-GGUF',
-        '180B': 'TheBloke/Falcon-180B-Chat-GGUF'
-    }
-    
-    parameter_choices = list(models.keys())
-    questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-    answers = inquirer.prompt(questions)
-    chosen_param = answers['param']
-
-    if chosen_param == "180B":
-      rprint(Markdown("> **WARNING:** To run `Falcon-180B` we recommend at least `100GB` of RAM."))
-
-    emplode.model = models[chosen_param]
-    emplode.local = True
-
 
   emplode.chat()
diff --git a/emplode/emplode.py b/emplode/emplode.py
index f30176c..54dbd72 100644
--- a/emplode/emplode.py
+++ b/emplode/emplode.py
@@ -3,36 +3,33 @@
 from .message_block import MessageBlock
 from .code_block import CodeBlock
 from .code_emplode import CodeEmplode
-from .get_hf_llm import get_hf_llm
 
 import os
 import time
 import traceback
 import json
 import platform
-import openai
-import litellm
-import pkg_resources
-
+import re
+from openai import OpenAI
+from openai import BadRequestError
 import getpass
-import requests
 import readline
 import tokentrim as tt
 from rich import print
 from rich.markdown import Markdown
 from rich.rule import Rule
 
-function_schema = {
+# Responses API tool definition for function-calling (strict JSON Schema)
+RUN_CODE_TOOL = {
+  "type": "function",
   "name": "run_code",
-  "description":
-  "Executes code on the user's machine and returns the output",
+  "description": "Executes code on the user's machine and returns the output",
   "parameters": {
     "type": "object",
     "properties": {
       "language": {
         "type": "string",
-        "description":
-        "The programming language",
+        "description": "The programming language",
         "enum": ["python", "R", "shell", "applescript", "javascript", "html"]
       },
       "code": {
@@ -40,28 +37,15 @@
         "description": "The code to execute"
       }
     },
-    "required": ["language", "code"]
-  },
+    "required": ["language", "code"],
+    "additionalProperties": False
+  }
 }
 
-missing_api_key_message = """> OpenAI API key not found
-
-To use `GPT-4o` (recommended) please provide an OpenAI API key.
-
-To use `Code-Llama` (free but less capable) press `enter`.
-"""
-
-missing_azure_info_message = """> Azure OpenAI Service API info not found
-
-To use `GPT-4` (recommended) please provide an Azure OpenAI API key, a API base, a deployment name and a API version.
-
-To use `Code-Llama` (free but less capable) press `enter`.
-"""
+missing_api_key_message = "> OpenAI API key not found. Provide an OpenAI API key to continue.\n"
 
 confirm_mode_message = """
-**Emplode** will require approval before running code. Use `emplode -y` to bypass this.
-
-Press `CTRL-C` to exit.
+Emplode will require approval before running code. Use `emplode -y` to bypass this.
 """
 
 
@@ -69,135 +53,83 @@ class Emplode:
 
   def __init__(self):
     self.messages = []
-    self.temperature = 0.001
     self.api_key = None
     self.auto_run = False
-    self.local = False
-    self.model = "gpt-4o"
+    self.model = "gpt-5"
     self.debug_mode = False
-    self.api_base = None 
-    self.context_window = 2000 
+    self.context_window = 200000
     self.max_tokens = 750
-    self.use_azure = False
-    self.azure_api_base = None
-    self.azure_api_version = None
-    self.azure_deployment_name = None
-    self.azure_api_type = "azure"
+    self.max_auto_fixes = int(os.getenv('EMPLODE_AUTO_FIX_LIMIT', '5'))
+    self._auto_fix_count = 0
+    self.auto_install = os.getenv('EMPLODE_AUTO_INSTALL', 'true').lower() in ('1','true','yes','y')
+    self._install_attempted = set()
     here = os.path.abspath(os.path.dirname(__file__))
     with open(os.path.join(here, 'system_message.txt'), 'r') as f:
       self.system_message = f.read().strip()
 
     self.code_emplodes = {}
-
     self.active_block = None
-
-    self.llama_instance = None
+    self.client = None
 
   def cli(self):
     cli(self)
 
   def get_info_for_system_message(self):
-
-    info = ""
-
     username = getpass.getuser()
-    current_working_directory = os.getcwd()
-    operating_system = platform.system()
-
-    info += f"[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"
-
-    if not self.local:
-
-      query = []
-      for message in self.messages[-2:]:
-        message_for_semantic_search = {"role": message["role"]}
-        if "content" in message:
-          message_for_semantic_search["content"] = message["content"]
-        if "function_call" in message and "parsed_arguments" in message["function_call"]:
-          message_for_semantic_search["function_call"] = message["function_call"]["parsed_arguments"]
-        query.append(message_for_semantic_search)
-
-      url = "https://open-procedures.replit.app/search/"
-
-      try:
-        relevant_procedures = requests.get(url, data=json.dumps(query)).json()["procedures"]
-        info += "\n\n# Recommended Procedures\n" + "\n---\n".join(relevant_procedures) + "\nIn your plan, include steps and, if present, **EXACT CODE SNIPPETS** (especially for depracation notices, **WRITE THEM INTO YOUR PLAN -- underneath each numbered step** as they will VANISH once you execute your first line of code, so WRITE THEM DOWN NOW if you need them) from the above procedures if they are relevant to the task. Again, include **VERBATIM CODE SNIPPETS** from the procedures above if they are relevent to the task **directly in your plan.**"
-      except:
-        pass
-
-    elif self.local:
-      info += "\n\nTo run code, write a fenced code block (i.e ```python, R or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its output."
-    return info
+    cwd = os.getcwd()
+    os_name = platform.system()
+    return f"[User Info]\nName: {username}\nCWD: {cwd}\nOS: {os_name}"
 
   def reset(self):
-    
     self.messages = []
     self.code_emplodes = {}
 
   def load(self, messages):
     self.messages = messages
 
-
   def handle_undo(self, arguments):
-
     if len(self.messages) == 0:
       return
     last_user_index = None
     for i, message in enumerate(self.messages):
-        if message.get('role') == 'user':
-            last_user_index = i
-
-    removed_messages = []
-
+      if message.get('role') == 'user':
+        last_user_index = i
+    removed = []
     if last_user_index is not None:
-        removed_messages = self.messages[last_user_index:]
-        self.messages = self.messages[:last_user_index]
-
-    print("") 
-
-    for message in removed_messages:
-      if 'content' in message and message['content'] != None:
-        print(Markdown(f"**Removed message:** `\"{message['content'][:30]}...\"`"))
-      elif 'function_call' in message:
-        print(Markdown(f"**Removed codeblock**")) # TODO: Could add preview of code removed here.
-    
-    print("") 
+      removed = self.messages[last_user_index:]
+      self.messages = self.messages[:last_user_index]
+    print("")
+    for m in removed:
+      if 'content' in m and m['content'] is not None:
+        print(Markdown(f"**Removed message:** `\"{m['content'][:30]}...\"`"))
+      elif 'function_call' in m:
+        print(Markdown("**Removed codeblock**"))
+    print("")
+
   def handle_help(self, arguments):
-    commands_description = {
-      "%debug [true/false]": "Toggle debug mode. Without arguments or with 'true', it enters debug mode. With 'false', it exits debug mode.",
-      "%reset": "Resets the current session.",
-      "%undo": "Remove previous messages and its response from the message history.",
-      "%save_message [path]": "Saves messages to a specified JSON path. If no path is provided, it defaults to 'messages.json'.",
-      "%load_message [path]": "Loads messages from a specified JSON path. If no path is provided, it defaults to 'messages.json'.",
+    items = {
+      "%debug [true/false]": "Toggle debug mode.",
+      "%reset": "Reset the current session.",
+      "%undo": "Remove the previous user message and response.",
+      "%save_message [path]": "Save messages to JSON.",
+      "%load_message [path]": "Load messages from JSON.",
       "%help": "Show this help message.",
     }
-
-    base_message = [
-      "> **Available Commands:**\n\n"
-    ]
-
-    for cmd, desc in commands_description.items():
-      base_message.append(f"- `{cmd}`: {desc}\n")
-
-    additional_info = [
-      "\n\nFor further assistance, please join our community Discord or consider contributing to the project's development."
-    ]
-
-    full_message = base_message + additional_info
-
-    print(Markdown("".join(full_message)))
-
+    base = ["> **Available Commands:**\n\n"]
+    for cmd, desc in items.items():
+      base.append(f"- `{cmd}`: {desc}\n")
+    print(Markdown("".join(base)))
 
   def handle_debug(self, arguments=None):
     if arguments == "" or arguments == "true":
-        print(Markdown("> Entered debug mode"))
-        print(self.messages)
-        self.debug_mode = True
+      print(Markdown("> Entered debug mode"))
+      print(self.messages)
+      self.debug_mode = True
     elif arguments == "false":
-        print(Markdown("> Exited debug mode"))
-        self.debug_mode = False
+      print(Markdown("> Exited debug mode"))
+      self.debug_mode = False
     else:
-        print(Markdown("> Unknown argument to debug command."))
+      print(Markdown("> Unknown argument to debug command."))
 
   def handle_reset(self, arguments):
     self.reset()
@@ -214,7 +146,6 @@ def handle_save_message(self, json_path):
       json_path += ".json"
     with open(json_path, 'w') as f:
       json.dump(self.messages, f, indent=2)
-
     print(Markdown(f"> messages json export to {os.path.abspath(json_path)}"))
 
   def handle_load_message(self, json_path):
@@ -224,7 +155,6 @@ def handle_load_message(self, json_path):
       json_path += ".json"
     with open(json_path, 'r') as f:
       self.load(json.load(f))
-
     print(Markdown(f"> messages json loaded from {os.path.abspath(json_path)}"))
 
   def handle_command(self, user_input):
@@ -236,521 +166,288 @@ def handle_command(self, user_input):
       "load_message": self.handle_load_message,
       "undo": self.handle_undo,
     }
-
-    user_input = user_input[1:].strip()  
+    user_input = user_input[1:].strip()
     command = user_input.split(" ")[0]
     arguments = user_input[len(command):].strip()
-    action = switch.get(command,
-                        self.default_handle)  
-    action(arguments)  
+    switch.get(command, self.default_handle)(arguments)
 
   def chat(self, message=None, return_messages=False):
+    self.verify_api_key()
+    self._auto_fix_count = 0
 
-    if not self.local:
-      self.verify_api_key()
-
-    if self.local:
-
-      if self.llama_instance == None:
-        try:
-          self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
-          if self.llama_instance == None:
-            return
-        except:
-          traceback.print_exc()
-
-          print(Markdown("".join([
-            f"> Failed to install `{self.model}`.",
-            f"\n\n**Common Fixes:** You can follow our simple setup docs at the link below to resolve common errors.\n\n```\nhttps://github.com/emplodeai/emplode/\n```",
-            f"\n\n**If you've tried that and you're still getting an error, we have likely not built the proper `{self.model}` support for your system.**",
-            "\n\n*( Running language models locally is a difficult task!* If you have insight into the best way to implement this across platforms/architectures, please join the Emplode community Discord and consider contributing the project's development. )",
-            "\n\nPress enter to switch to `GPT-4o` (recommended)."
-          ])))
-          input()
-
-          self.local = False
-          self.model = "gpt-4o"
-          self.verify_api_key()
-
-    welcome_message = ""
-
+    welcome = ""
     if self.debug_mode:
-      welcome_message += "> Entered debug mode"
-
-    if not self.local and not self.auto_run:
-
-      if self.use_azure:
-        notice_model = f"{self.azure_deployment_name} (Azure)"
-      else:
-        notice_model = f"{self.model.upper()}"
-      welcome_message += f"\n> Model set to `{notice_model}`\n\n**Tip:** To run locally, use `emplode --local`"
-      
-    if self.local:
-      welcome_message += f"\n> Model set to `{self.model}`"
-
+      welcome += "> Entered debug mode"
+    welcome += f"\n> Model set to `{self.model.upper()}`"
     if not self.auto_run:
-      welcome_message += "\n\n" + confirm_mode_message
-
-    welcome_message = welcome_message.strip()
-
-    if welcome_message != "":
-      if welcome_message.startswith(">"):
-        print(Markdown(welcome_message), '')
-      else:
-        print('', Markdown(welcome_message), '')
+      welcome += f"\n\n{confirm_mode_message}"
+    welcome = welcome.strip()
+    if welcome:
+      print(Markdown(welcome), '')
 
     if message:
       self.messages.append({"role": "user", "content": message})
       self.respond()
-
     else:
       while True:
         try:
           user_input = input("> ").strip()
-        except EOFError:
+        except (EOFError, KeyboardInterrupt):
+          print()
           break
-        except KeyboardInterrupt:
-          print()  
-          break
-
-        readline.add_history(user_input)
-
         if user_input.startswith("%") or user_input.startswith("/"):
           self.handle_command(user_input)
           continue
-
         self.messages.append({"role": "user", "content": user_input})
-
         try:
           self.respond()
         except KeyboardInterrupt:
           pass
         finally:
-      
           self.end_active_block()
 
     if return_messages:
-        return self.messages
+      return self.messages
 
   def verify_api_key(self):
-    if self.use_azure:
-      all_env_available = (
-        ('AZURE_API_KEY' in os.environ or 'OPENAI_API_KEY' in os.environ) and
-        'AZURE_API_BASE' in os.environ and
-        'AZURE_API_VERSION' in os.environ and
-        'AZURE_DEPLOYMENT_NAME' in os.environ)
-      if all_env_available:
-        self.api_key = os.environ.get('AZURE_API_KEY') or os.environ['OPENAI_API_KEY']
-        self.azure_api_base = os.environ['AZURE_API_BASE']
-        self.azure_api_version = os.environ['AZURE_API_VERSION']
-        self.azure_deployment_name = os.environ['AZURE_DEPLOYMENT_NAME']
-        self.azure_api_type = os.environ.get('AZURE_API_TYPE', 'azure')
-      else:
-        self._print_welcome_message()
-        time.sleep(1)
-
-        print(Rule(style="white"))
-
-        print(Markdown(missing_azure_info_message), '', Rule(style="white"), '')
-        response = input("Azure OpenAI API key: ")
-
-        if response == "":
-
-          print(Markdown(
-            "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
-                '')
-          time.sleep(2)
-          print(Rule(style="white"))
-
-          import inquirer
-
-          print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
-
-          models = {
-              '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-              '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-              '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-          }
-
-          parameter_choices = list(models.keys())
-          questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-          answers = inquirer.prompt(questions)
-          chosen_param = answers['param']
-
-          self.model = models[chosen_param]
-          self.local = True
-
-
-
-
-          return
-
-        else:
-          self.api_key = response
-          self.azure_api_base = input("Azure OpenAI API base: ")
-          self.azure_deployment_name = input("Azure OpenAI deployment name of GPT: ")
-          self.azure_api_version = input("Azure OpenAI API version: ")
-          print('', Markdown(
-            "**Tip:** To save this key for later, run `export AZURE_API_KEY=your_api_key AZURE_API_BASE=your_api_base AZURE_API_VERSION=your_api_version AZURE_DEPLOYMENT_NAME=your_gpt_deployment_name` on Mac/Linux or `setx AZURE_API_KEY your_api_key AZURE_API_BASE your_api_base AZURE_API_VERSION your_api_version AZURE_DEPLOYMENT_NAME your_gpt_deployment_name` on Windows."),
-                '')
-          time.sleep(2)
-          print(Rule(style="white"))
-
-      litellm.api_type = self.azure_api_type
-      litellm.api_base = self.azure_api_base
-      litellm.api_version = self.azure_api_version
-      litellm.api_key = self.api_key
-    else:
-      if self.api_key == None:
-        if 'OPENAI_API_KEY' in os.environ:
-          self.api_key = os.environ['OPENAI_API_KEY']
-        else:
-          self._print_welcome_message()
-          time.sleep(1)
-
-          print(Rule(style="white"))
-
-          print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
-          response = input("OpenAI API key: ")
-
-          if response == "":
-
-              print(Markdown(
-                "> Switching to `Code-Llama`...\n\n**Tip:** Run `emplode --local` to automatically use `Code-Llama`."),
-                    '')
-              time.sleep(2)
-              print(Rule(style="white"))
-
-              import inquirer
-
-              print('', Markdown("**Emplode** will use `Code Llama` for local execution."), '')
-
-              models = {
-                  '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-                  '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-                  '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-              }
-
-              parameter_choices = list(models.keys())
-              questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-              answers = inquirer.prompt(questions)
-              chosen_param = answers['param']
-              self.model = models[chosen_param]
-              self.local = True
-
-
-
-
-              return
-
-          else:
-              self.api_key = response
-              print('', Markdown("**Tip:** To save this key for later, run `setx OPENAI_API_KEY your_api_key` on Windows or `export OPENAI_API_KEY=your_api_key` on Mac/Linux."), '')
-              time.sleep(2)
-              print(Rule(style="white"))
-
-      litellm.api_key = self.api_key
-      if self.api_base:
-        litellm.api_base = self.api_base
+    if self.api_key is None:
+      key = os.environ.get('OPENAI_API_KEY')
+      if not key:
+        print(Markdown(missing_api_key_message))
+        key = input("OpenAI API key: ").strip()
+        if not key:
+          raise Exception("OpenAI API key is required to use Emplode with GPT-5.")
+      self.api_key = key
+    if self.client is None:
+      self.client = OpenAI(api_key=self.api_key)
 
   def end_active_block(self):
     if self.active_block:
       self.active_block.end()
       self.active_block = None
 
-  def respond(self):
-    info = self.get_info_for_system_message()
-
-    if self.local:
-      self.system_message = "\n".join(self.system_message.split("\n")[:2])
-      self.system_message += "\nOnly do what the user asks you to do, then ask what they'd like to do next."
-
-    system_message = self.system_message + "\n\n" + info
-
-    if self.local:
-      messages = tt.trim(self.messages, max_tokens=(self.context_window-self.max_tokens-25), system_message=system_message)
-    else:
-      messages = tt.trim(self.messages, self.model, system_message=system_message)
-
-    if self.debug_mode:
-      print("\n", "Sending `messages` to LLM:", "\n")
-      print(messages)
-      print()
-
-    if not self.local:
-      
-      error = ""
-      
-      for _ in range(3): 
-        try:
-
-            if self.use_azure:
-              response = litellm.completion(
-                  f"azure/{self.azure_deployment_name}",
-                  messages=messages,
-                  functions=[function_schema],
-                  temperature=self.temperature,
-                  stream=True,
-                  )
-            else:
-              if self.api_base:
-                response = litellm.completion(
-                  api_base=self.api_base,
-                  model = "custom/" + self.model,
-                  messages=messages,
-                  functions=[function_schema],
-                  stream=True,
-                  temperature=self.temperature,
-                )
-              else:
-                response = litellm.completion(
-                  model=self.model,
-                  messages=messages,
-                  functions=[function_schema],
-                  stream=True,
-                  temperature=self.temperature,
-                )
-
-            break
-        except:
-            if self.debug_mode:
-              traceback.print_exc()
-            error = traceback.format_exc()
-            time.sleep(3)
-      else:
-        raise Exception(error)
-            
-    elif self.local:
-
-      def messages_to_prompt(messages):
-
-
-        for message in messages:
-          if "role" not in message:
-            message["role"] = "assistant"
-
-        if "falcon" in self.model.lower():
-
-          formatted_messages = ""
-          for message in messages:
-            formatted_messages += f"{message['role'].capitalize()}: {message['content']}\n"
-          formatted_messages = formatted_messages.strip()
-
-        else:
-          
-          system_prompt = messages[0]['content']
-          formatted_messages = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"
-
-          for index, item in enumerate(messages[1:]):
-              role = item['role']
-              content = item['content']
-
-              if role == 'user':
-                  formatted_messages += f"{content} [/INST] "
-              elif role == 'function':
-                  formatted_messages += f"Output: {content} [/INST] "
-              elif role == 'assistant':
-                  formatted_messages += f"{content} </s><s>[INST] "
-
-          if formatted_messages.endswith("<s>[INST] "):
-              formatted_messages = formatted_messages[:-10]
-
-        return formatted_messages
-
-      prompt = messages_to_prompt(messages)
-      if messages[-1]["role"] != "function":
-        prompt += "Let's explore this. By the way, I can run code on your machine by writing the code in a markdown code block. This works for shell, javascript, python, R, and applescript. I'm going to try to do this for your task. Anyway, "
-      elif messages[-1]["role"] == "function" and messages[-1]["content"] != "No output":
-        prompt += "Given the output of the code I just ran, "
-      elif messages[-1]["role"] == "function" and messages[-1]["content"] == "No output":
-        prompt += "Given the fact that the code I just ran produced no output, "
-
-
-      if self.debug_mode:
-        import builtins
-        builtins.print("TEXT PROMPT SEND TO LLM:\n", prompt)
-
-      response = self.llama_instance(
-        prompt,
-        stream=True,
-        temperature=self.temperature,
-        stop=["</s>"],
-        max_tokens=750 
-      )
-
-    self.messages.append({})
-    in_function_call = False
-    llama_function_call_finished = False
-    self.active_block = None
-
-    for chunk in response:
-      if self.use_azure and ('choices' not in chunk or len(chunk['choices']) == 0):
-        continue
-
-      if self.local:
-        if "content" not in messages[-1]:
-          chunk["choices"][0]["text"] = chunk["choices"][0]["text"].capitalize()
-          messages[-1]["role"] = "assistant"
-        delta = {"content": chunk["choices"][0]["text"]}
-      else:
-        delta = chunk["choices"][0]["delta"]
-
-      self.messages[-1] = merge_deltas(self.messages[-1], delta)
-
-      if not self.local:
-        condition = "function_call" in self.messages[-1]
-      elif self.local:
-        if "content" in self.messages[-1]:
-          condition = self.messages[-1]["content"].count("```") % 2 == 1
-        else:
-          condition = False
-
-      if condition:
-        if in_function_call == False:
-
-          self.end_active_block()
-
-          last_role = self.messages[-2]["role"]
-          if last_role == "user" or last_role == "function":
-            print()
-
-          self.active_block = CodeBlock()
-
-        in_function_call = True
-
-        if not self.local:
-          if "arguments" in self.messages[-1]["function_call"]:
-            arguments = self.messages[-1]["function_call"]["arguments"]
-            new_parsed_arguments = parse_partial_json(arguments)
-            if new_parsed_arguments:
-              self.messages[-1]["function_call"][
-                "parsed_arguments"] = new_parsed_arguments
-
-        elif self.local:
-          if "content" in self.messages[-1]:
-
-            content = self.messages[-1]["content"]
-
-            if "```" in content:
-              blocks = content.split("```")
-
-              current_code_block = blocks[-1]
-
-              lines = current_code_block.split("\n")
-
-              if content.strip() == "```": 
-                language = None
-              else:
-                if lines[0] != "":
-                  language = lines[0].strip()
-                else:
-                  language = "python"
-                  if len(lines) > 1:
-                    if lines[1].startswith("pip"):
-                      language = "shell"
-
-              code = '\n'.join(lines[1:]).strip("` \n")
-
-              arguments = {"code": code}
-              if language: 
-                if language == "bash":
-                  language = "shell"
-                arguments["language"] = language
-
-            if "function_call" not in self.messages[-1]:
-              self.messages[-1]["function_call"] = {}
-
-            self.messages[-1]["function_call"]["parsed_arguments"] = arguments
-
-      else:
-        if in_function_call == True:
-
-          if self.local:
-          
-            llama_function_call_finished = True
-
-        in_function_call = False
-
-        if self.active_block == None:
-
-          self.active_block = MessageBlock()
-
-      self.active_block.update_from_message(self.messages[-1])
-
-      if chunk["choices"][0]["finish_reason"] or llama_function_call_finished:
-        if chunk["choices"][
-            0]["finish_reason"] == "function_call" or llama_function_call_finished:
-
-          if self.debug_mode:
-            print("Running function:")
-            print(self.messages[-1])
-            print("---")
-
-          if self.auto_run == False:
-
-            self.active_block.end()
-            language = self.active_block.language
-            code = self.active_block.code
-
-            response = input("  Would you like to run this code? (y/n)\n\n  ")
-            print("")
-
-            if response.strip().lower() == "y":
-              self.active_block = CodeBlock()
-              self.active_block.language = language
-              self.active_block.code = code
-
-            else:
-              self.active_block.end()
-              self.messages.append({
-                "role":
-                "function",
-                "name":
-                "run_code",
-                "content":
-                "User decided not to run this code."
-              })
-              return
-
-          if not self.local and "parsed_arguments" not in self.messages[-1]["function_call"]:
-
-            self.messages.append({
-              "role": "function",
-              "name": "run_code",
-              "content": """Your function call could not be parsed. Please use ONLY the `run_code` function, which takes two parameters: `code` and `language`. Your response should be formatted as a JSON."""
-            })
-
-            self.respond()
+  def _extract_last_code_block(self, text):
+    pattern = re.compile(r"```([a-zA-Z]+)?\n([\s\S]*?)```", re.DOTALL)
+    matches = list(pattern.finditer(text or ""))
+    if not matches:
+      return None, None
+    lang = matches[-1].group(1) or "python"
+    code = matches[-1].group(2) or ""
+    if lang == "bash":
+      lang = "shell"
+    return lang, code.strip()
+
+  def _stream_with_responses(self, sys_and_messages):
+    content_buf = ""
+    tool_name = None
+    tool_args_buf = ""
+
+    # Live stream
+    try:
+      with self.client.responses.stream(
+        model=self.model,
+        input=sys_and_messages,
+        tools=[RUN_CODE_TOOL],
+      ) as stream:
+        for event in stream:
+          t = getattr(event, 'type', '')
+          # Text deltas
+          if 'output_text.delta' in t:
+            delta = getattr(event, 'delta', '') or getattr(event, 'text', '')
+            if delta:
+              content_buf += delta
+              if not isinstance(self.active_block, MessageBlock):
+                self.end_active_block()
+                self.active_block = MessageBlock()
+              self.active_block.update_from_message({"content": content_buf})
+          # Tool call incremental pieces
+          elif 'tool_call.delta' in t:
+            d = getattr(event, 'delta', None)
+            if isinstance(d, dict):
+              if not tool_name and d.get('name'):
+                tool_name = d['name']
+              if d.get('arguments'):
+                tool_args_buf += d['arguments']
+            elif isinstance(d, str):
+              tool_args_buf += d
+          # Tool call finished
+          elif 'tool_call.completed' in t:
+            self._execute_run_code(tool_name, tool_args_buf)
             return
+          # Response finished
+          elif t.endswith('completed') or t == 'response.completed':
+            # If model wrote a code block instead of tool call, run it
+            lang, code = self._extract_last_code_block(content_buf)
+            if lang and code:
+              self._execute_run_code('run_code', json.dumps({"language": lang, "code": code}))
+            return
+        _ = stream.get_final_response()
+    except BadRequestError:
+      # Fallback to non-stream
+      r = self.client.responses.create(
+        model=self.model,
+        input=sys_and_messages,
+        tools=[RUN_CODE_TOOL],
+        stream=False,
+      )
+      return self._handle_nonstream_response(r)
+
+  def _handle_nonstream_response(self, r):
+    # Try to read tool calls; structure can vary by SDK version
+    try:
+      out = getattr(r, 'output', None) or []
+    except Exception:
+      out = []
+    # Search for tool call
+    tool_name = None
+    tool_args = None
+    text_accum = ""
+    for item in out:
+      t = getattr(item, 'type', None)
+      if t == 'tool_call':
+        f = getattr(item, 'tool_call', None)
+        if f and getattr(f, 'type', '') == 'function':
+          tool_name = getattr(f, 'name', None)
+          tool_args = getattr(f, 'arguments', None)
+          break
+      if t == 'message' and hasattr(item, 'content'):
+        for c in getattr(item, 'content', []) or []:
+          if getattr(c, 'type', None) == 'output_text':
+            text_accum += getattr(c, 'text', '') or ''
+    if tool_name:
+      self._execute_run_code(tool_name, tool_args or "")
+      return
+    if text_accum:
+      # Try to run code fence if present
+      lang, code = self._extract_last_code_block(text_accum)
+      if lang and code:
+        self._execute_run_code('run_code', json.dumps({"language": lang, "code": code}))
+        return
+      self.end_active_block()
+      self.active_block = MessageBlock()
+      self.active_block.update_from_message({"content": text_accum})
+      self.active_block.end()
 
-          language = self.messages[-1]["function_call"]["parsed_arguments"][
-            "language"]
-          if language not in self.code_emplodes:
-            self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
-          code_emplode = self.code_emplodes[language]
-
-          code_emplode.active_block = self.active_block
-          code_emplode.run()
-
-          self.active_block.end()
+  def _is_error_output(self, output):
+    out = (output or "").lower()
+    patterns = [
+      "traceback (most recent call last)",
+      "error:",
+      "exception:",
+      "command not found",
+      "no such file or directory",
+      "module not found",
+      "moduleNotFoundError".lower(),
+      "nameerror:",
+      "syntaxerror:",
+      "typeerror:",
+      "valueerror:",
+      "runtimeerror:",
+    ]
+    return any(p in out for p in patterns)
+
+  def _run_code_direct(self, language, code):
+    self.end_active_block()
+    self.active_block = CodeBlock()
+    self.active_block.language = language
+    self.active_block.code = code
+    self.active_block.refresh()
+    if language not in self.code_emplodes:
+      self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
+    ce = self.code_emplodes[language]
+    ce.active_block = self.active_block
+    ce.run()
+    output = self.active_block.output
+    self.active_block.end()
+    return output
+
+  def _maybe_auto_install(self, language, code, output):
+    if not self.auto_install or language != 'python':
+      return False
+    text = output or ""
+    m = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", text)
+    if not m:
+      m = re.search(r"No module named ['\"]?([A-Za-z0-9_\-.]+)['\"]?", text)
+    if not m:
+      return False
+    pkg = m.group(1)
+    if pkg in self._install_attempted:
+      return False
+    self._install_attempted.add(pkg)
+
+    install_cmd = f"python -m pip install -U {pkg} || python3 -m pip install -U {pkg}"
+    self._run_code_direct('shell', install_cmd)
+    new_out = self._run_code_direct(language, code)
+    self._auto_fix_or_finish(new_out)
+    return True
+
+  def _auto_fix_or_finish(self, output):
+    if self._auto_fix_count >= self.max_auto_fixes:
+      return
+    if self._is_error_output(output):
+      self._auto_fix_count += 1
+      self.messages.append({
+        "role": "user",
+        "content": (
+          "Execution failed. Here is the full output from your last run:\n\n" +
+          (output or "No output") +
+          "\n\nPlease fix the issue and try again using the run_code tool only."
+        )
+      })
+      self.respond()
 
-          self.messages.append({
-            "role": "function",
-            "name": "run_code",
-            "content": self.active_block.output if self.active_block.output else "No output"
-          })
+  def _execute_run_code(self, tool_name, raw_args):
+    if tool_name != 'run_code':
+      return
+    parsed = parse_partial_json(raw_args or "") or {}
+    language = parsed.get('language')
+    code = parsed.get('code')
+    if not language or not code:
+      self.end_active_block()
+      self.active_block = MessageBlock()
+      self.active_block.update_from_message({"content": "Tool arguments missing 'language' or 'code'."})
+      self.active_block.end()
+      return
+    # Show code
+    self.end_active_block()
+    print()
+    self.active_block = CodeBlock()
+    self.active_block.language = language
+    self.active_block.code = code
+    self.active_block.refresh()
+    if self.auto_run is False:
+      self.active_block.end()
+      resp = input("  Would you like to run this code? (y/n)\n\n  ")
+      print("")
+      if resp.strip().lower() != 'y':
+        return
+      self.active_block = CodeBlock()
+      self.active_block.language = language
+      self.active_block.code = code
+    if language not in self.code_emplodes:
+      self.code_emplodes[language] = CodeEmplode(language, self.debug_mode)
+    ce = self.code_emplodes[language]
+    ce.active_block = self.active_block
+    ce.run()
+    output = self.active_block.output
+    self.active_block.end()
+    # Try auto-install then auto-fix
+    if self._maybe_auto_install(language, code, output):
+      return
+    self._auto_fix_or_finish(output)
 
-          self.respond()
+  def respond(self):
+    info = self.get_info_for_system_message()
+    system_message = self.system_message + "\n\n" + info
 
-        if chunk["choices"][0]["finish_reason"] != "function_call":
+    # Trim conversation to fit
+    trimmed = tt.trim(self.messages, max_tokens=(self.context_window - self.max_tokens - 25), system_message=system_message)
 
-          if self.local and "content" in self.messages[-1]:
-            self.messages[-1]["content"] = self.messages[-1]["content"].strip().rstrip("#")
-            self.active_block.update_from_message(self.messages[-1])
-            time.sleep(0.1)
+    # Convert to Responses API input
+    sys_and_messages = [{"role": "system", "content": system_message}] + trimmed[1:]
 
-          self.active_block.end()
-          return
+    # Stream first; fallback to non-stream automatically
+    self._stream_with_responses(sys_and_messages)
 
   def _print_welcome_message(self):
     print("", "", Markdown(f"\nWelcome to **Emplode**.\n"), "")
diff --git a/emplode/get_hf_llm.py b/emplode/get_hf_llm.py
deleted file mode 100644
index a93b02e..0000000
--- a/emplode/get_hf_llm.py
+++ /dev/null
@@ -1,291 +0,0 @@
-import os
-import sys
-import appdirs
-import traceback
-import inquirer
-import subprocess
-from rich import print
-from rich.markdown import Markdown
-import os
-import shutil
-from huggingface_hub import list_files_info, hf_hub_download
-
-
-def get_hf_llm(repo_id, debug_mode, context_window):
-
-    if "TheBloke/CodeLlama-" not in repo_id:
-      print('', Markdown(f"**Emplode** will use `{repo_id}` for local execution."), '')
-
-    raw_models = list_gguf_files(repo_id)
-    
-    if not raw_models:
-        print(f"Failed. Are you sure there are GGUF files in `{repo_id}`?")
-        return None
-
-    combined_models = group_and_combine_splits(raw_models)
-
-    selected_model = None
-
-    if len(combined_models) > 3:
-
-        choices = [
-            format_quality_choice(combined_models[0], "Small"),
-            format_quality_choice(combined_models[len(combined_models) // 2], "Medium"),
-            format_quality_choice(combined_models[-1], "Large"),
-            "See More"
-        ]
-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
-        answers = inquirer.prompt(questions)
-        if answers["selected_model"].startswith("Small"):
-            selected_model = combined_models[0]["filename"]
-        elif answers["selected_model"].startswith("Medium"):
-            selected_model = combined_models[len(combined_models) // 2]["filename"]
-        elif answers["selected_model"].startswith("Large"):
-            selected_model = combined_models[-1]["filename"]
-    
-    if selected_model == None:
-      
-        choices = [format_quality_choice(model) for model in combined_models]
-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
-        answers = inquirer.prompt(questions)
-        for model in combined_models:
-            if format_quality_choice(model) == answers["selected_model"]:
-                selected_model = model["filename"]
-                break
-
-    if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"):
-      n_gpu_layers = -1
-    else:
-      n_gpu_layers = 0
-
-    user_data_dir = appdirs.user_data_dir("Emplode")
-    default_path = os.path.join(user_data_dir, "models")
-
-    os.makedirs(default_path, exist_ok=True)
-
-    directories_to_check = [
-        default_path,
-        "llama.cpp/models/",
-        os.path.expanduser("~") + "/llama.cpp/models/",
-        "/"
-    ]
-
-    for directory in directories_to_check:
-        path = os.path.join(directory, selected_model)
-        if os.path.exists(path):
-            model_path = path
-            break
-    else:
-        download_path = os.path.join(default_path, selected_model)
-      
-        print(f"This language model was not found on your system.\n\nDownload to `{default_path}`?", "")
-        if confirm_action(""):
-            for model_details in combined_models:
-                if model_details["filename"] == selected_model:
-                    selected_model_details = model_details
-
-                    if not enough_disk_space(selected_model_details['Size'], default_path):
-                        print(f"You do not have enough disk space available to download this model.")
-                        return None
-
-            split_files = [model["filename"] for model in raw_models if selected_model in model["filename"]]
-            
-            if len(split_files) > 1:
-                for split_file in split_files:
-                    split_path = os.path.join(default_path, split_file)
-                    if os.path.exists(split_path):
-                        if not confirm_action(f"Split file {split_path} already exists. Download again?"):
-                            continue
-                    hf_hub_download(
-                        repo_id=repo_id,
-                        filename=split_file,
-                        local_dir=default_path,
-                        local_dir_use_symlinks=False,
-                        resume_download=True)
-        
-                actually_combine_files(default_path, selected_model, split_files)
-            else:
-                hf_hub_download(
-                    repo_id=repo_id,
-                    filename=selected_model,
-                    local_dir=default_path,
-                    local_dir_use_symlinks=False,
-                    resume_download=True)
-
-            model_path = download_path
-        
-        else:
-            print('\n', "Download cancelled. Exiting.", '\n')
-            return None
-
-    print(Markdown(f"Model found at `{model_path}`"))
-  
-    try:
-        from llama_cpp import Llama
-    except:
-        if debug_mode:
-            traceback.print_exc()
-        message = "Local LLM interface package not found. Install `llama-cpp-python`?"
-        if confirm_action(message):
-    
-            import platform
-            
-            def check_command(command):
-                try:
-                    subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                    return True
-                except subprocess.CalledProcessError:
-                    return False
-                except FileNotFoundError:
-                    return False
-            
-            def install_llama(backend):
-                env_vars = {
-                    "FORCE_CMAKE": "1"
-                }
-                
-                if backend == "cuBLAS":
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
-                elif backend == "hipBLAS":
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_HIPBLAS=on"
-                elif backend == "Metal":
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_METAL=on"
-                else: 
-                    env_vars["CMAKE_ARGS"] = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
-                
-                try:
-                    subprocess.run([sys.executable, "-m", "pip", "install", "llama-cpp-python"], env={**os.environ, **env_vars}, check=True)
-                except subprocess.CalledProcessError as e:
-                    print(f"Error during installation with {backend}: {e}")
-            
-            def supports_metal():
-                if platform.system() == "Darwin":
-                    mac_version = tuple(map(int, platform.mac_ver()[0].split('.')))
-                    if mac_version >= (10, 11):
-                        return True
-                return False
-        
-            if check_command(["nvidia-smi"]):
-                install_llama("cuBLAS")
-            elif check_command(["rocminfo"]):
-                install_llama("hipBLAS")
-            elif supports_metal():
-                install_llama("Metal")
-            else:
-                install_llama("OpenBLAS")
-          
-            from llama_cpp import Llama
-            print('', Markdown("Finished downloading `Code-Llama` interface."), '')
-
-            if platform.system() == "Darwin":
-                if platform.machine() != "arm64":
-                    print("Warning: You are using Apple Silicon (M1/M2) Mac but your Python is not of 'arm64' architecture.")
-                    print("The llama.ccp x86 version will be 10x slower on Apple Silicon (M1/M2) Mac.")
-                    print("\nTo install the correct version of Python that supports 'arm64' architecture:")
-                    print("1. Download Miniforge for M1/M2:")
-                    print("wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh")
-                    print("2. Install it:")
-                    print("bash Miniforge3-MacOSX-arm64.sh")
-                    print("")
-      
-        else:
-            print('', "Installation cancelled. Exiting.", '')
-            return None
-        
-    assert os.path.isfile(model_path)
-    llama_2 = Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)
-      
-    return llama_2
-
-def confirm_action(message):
-    question = [
-        inquirer.Confirm('confirm',
-                         message=message,
-                         default=True),
-    ]
-
-    answers = inquirer.prompt(question)
-    return answers['confirm']
-
-
-import os
-import inquirer
-from huggingface_hub import list_files_info, hf_hub_download, login
-from typing import Dict, List, Union
-
-def list_gguf_files(repo_id: str) -> List[Dict[str, Union[str, float]]]:
-    try:
-      files_info = list_files_info(repo_id=repo_id)
-    except Exception as e:
-      if "authentication" in str(e).lower():
-        print("You likely need to be logged in to HuggingFace to access this language model.")
-        print(f"Visit this URL to log in and apply for access to this language model: https://huggingface.co/{repo_id}")
-        print("Then, log in here:")
-        login()
-        files_info = list_files_info(repo_id=repo_id)
-  
-    gguf_files = [file for file in files_info if "gguf" in file.rfilename]
-
-    gguf_files = sorted(gguf_files, key=lambda x: x.size)
-
-    result = []
-    for file in gguf_files:
-        size_in_gb = file.size / (1024**3)
-        filename = file.rfilename
-        result.append({
-            "filename": filename,
-            "Size": size_in_gb,
-            "RAM": size_in_gb + 2.5,
-        })
-
-    return result
-
-from typing import List, Dict, Union
-
-def group_and_combine_splits(models: List[Dict[str, Union[str, float]]]) -> List[Dict[str, Union[str, float]]]:
-    grouped_files = {}
-
-    for model in models:
-        base_name = model["filename"].split('-split-')[0]
-        
-        if base_name in grouped_files:
-            grouped_files[base_name]["Size"] += model["Size"]
-            grouped_files[base_name]["RAM"] += model["RAM"]
-            grouped_files[base_name]["SPLITS"].append(model["filename"])
-        else:
-            grouped_files[base_name] = {
-                "filename": base_name,
-                "Size": model["Size"],
-                "RAM": model["RAM"],
-                "SPLITS": [model["filename"]]
-            }
-
-    return list(grouped_files.values())
-
-
-def actually_combine_files(default_path: str, base_name: str, files: List[str]) -> None:
-    files.sort()    
-    base_path = os.path.join(default_path, base_name)
-    with open(base_path, 'wb') as outfile:
-        for file in files:
-            file_path = os.path.join(default_path, file)
-            with open(file_path, 'rb') as infile:
-                outfile.write(infile.read())
-            os.remove(file_path)
-
-def format_quality_choice(model, name_override = None) -> str:
-    if name_override:
-        name = name_override
-    else:
-        name = model['filename']
-    return f"{name} | Size: {model['Size']:.1f} GB, Estimated RAM usage: {model['RAM']:.1f} GB"
-
-def enough_disk_space(size, path) -> bool:
-    _, _, free = shutil.disk_usage(path)
-
-    free_gb = free / (2**30) 
-
-    if free_gb > size:
-        return True
-
-    return False
diff --git a/pyproject.toml b/pyproject.toml
index 7203053..bf38ad3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,26 +10,13 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-openai = "^0.27.8"
-rich = "^13.4.2"
-tiktoken = "^0.4.0"
-astor = "^0.8.1"
-git-python = "^1.0.3"
-tokentrim = "^0.1.9"
-appdirs = "^1.4.4"
-six = "^1.16.0"
-python-dotenv = "^1.0.0"
-
-inquirer = "^3.1.3"
-wget = "^3.2"
-huggingface-hub = "^0.16.4"
-litellm = "^0.1.590"
-[tool.poetry.dependencies.pyreadline3]
-version = "^3.4.1"
-markers = "sys_platform == 'win32'"
+openai = "^1.106.1"
+rich = "^14.1.0"
+tiktoken = "^0.11.0"
+tokentrim = "^0.1.13"
 
 [tool.poetry.group.dev.dependencies]
-pytest = "^7.4.0"
+pytest = "*"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]