diff --git a/README.md b/README.md
index 994361f..093d03b 100644
--- a/README.md
+++ b/README.md
@@ -51,10 +51,10 @@ opengradient config init
 import os
 import opengradient as og
 
-og_client = og.new_client(
+client = og.init(
+    private_key=os.environ.get("OG_PRIVATE_KEY"),
     email=None,  # Optional: only needed for model uploads
     password=None,
-    private_key=os.environ.get("OG_PRIVATE_KEY"),
 )
 ```
 
@@ -62,7 +62,7 @@ og_client = og.new_client(
 
 #### LLM Chat
 ```python
-completion = og_client.llm_chat(
+completion = client.llm.chat(
     model=og.TEE_LLM.GPT_4O,
     messages=[{"role": "user", "content": "Hello!"}],
 )
@@ -73,7 +73,7 @@ print(f"Tx hash: {completion.transaction_hash}")
 #### Custom Model Inference
 Browse models on our [Model Hub](https://hub.opengradient.ai/) or upload your own:
 ```python
-result = og_client.infer(
+result = client.inference.infer(
     model_cid="your-model-cid",
     model_input={"input": [1.0, 2.0, 3.0]},
     inference_mode=og.InferenceMode.VANILLA,
@@ -86,7 +86,7 @@ print(f"Output: {result.model_output}")
 OpenGradient supports secure, verifiable inference through TEE for leading LLM providers. Access models from OpenAI, Anthropic, Google, and xAI with cryptographic attestation:
 ```python
 # Use TEE mode for verifiable AI execution
-completion = og_client.llm_chat(
+completion = client.llm.chat(
     model=og.TEE_LLM.CLAUDE_3_7_SONNET,
     messages=[{"role": "user", "content": "Your message here"}],
 )
@@ -112,10 +112,10 @@ The Alpha Testnet provides access to experimental features, including **workflow
 ```python
 import opengradient as og
 
-og.init(
+client = og.init(
+    private_key="your-private-key",
     email="your-email",
     password="your-password",
-    private_key="your-private-key",
 )
 
 # Define input query for historical price data
@@ -129,7 +129,7 @@ input_query = og.HistoricalInputQuery(
 )
 
 # Deploy a workflow (optionally with scheduling)
-contract_address = og.alpha.new_workflow(
+contract_address = client.alpha.new_workflow(
     model_cid="your-model-cid",
     input_query=input_query,
     input_tensor_name="input",
@@ -141,14 +141,14 @@ print(f"Workflow deployed at: {contract_address}")
 #### Execute and Read Results
 ```python
 # Manually trigger workflow execution
-result = og.alpha.run_workflow(contract_address)
+result = client.alpha.run_workflow(contract_address)
 print(f"Inference output: {result}")
 
 # Read the latest result
-latest = og.alpha.read_workflow_result(contract_address)
+latest = client.alpha.read_workflow_result(contract_address)
 
 # Get historical results
-history = og.alpha.read_workflow_history(contract_address, num_results=5)
+history = client.alpha.read_workflow_history(contract_address, num_results=5)
 ```
 
 ### 6. Examples
diff --git a/docs/opengradient/llm/index.md b/docs/opengradient/agents/index.md
similarity index 76%
rename from docs/opengradient/llm/index.md
rename to docs/opengradient/agents/index.md
index 3ff24b3..8ad6984 100644
--- a/docs/opengradient/llm/index.md
+++ b/docs/opengradient/agents/index.md
@@ -4,7 +4,7 @@ outline: [2,3]
 
   
 
-# Package opengradient.llm
+# Package opengradient.agents
 
 OpenGradient LLM Adapters
 
@@ -19,7 +19,7 @@ into existing applications and agent frameworks.
 ### Langchain adapter 
 
 ```python
-def langchain_adapter(private_key: str, model_cid: opengradient.types.LLM, max_tokens: int = 300) ‑> opengradient.llm.og_langchain.OpenGradientChatModel
+def langchain_adapter(private_key: str, model_cid: opengradient.types.LLM, max_tokens: int = 300) ‑> opengradient.agents.og_langchain.OpenGradientChatModel
 ```
 
   
@@ -34,7 +34,7 @@ and can be plugged into LangChain agents.
 ### Openai adapter 
 
 ```python
-def openai_adapter(private_key: str) ‑> opengradient.llm.og_openai.OpenGradientOpenAIClient
+def openai_adapter(private_key: str) ‑> opengradient.agents.og_openai.OpenGradientOpenAIClient
 ```
 
   
diff --git a/docs/opengradient/alphasense/index.md b/docs/opengradient/alphasense/index.md
index fb93d63..1db3a47 100644
--- a/docs/opengradient/alphasense/index.md
+++ b/docs/opengradient/alphasense/index.md
@@ -15,7 +15,7 @@ OpenGradient AlphaSense Tools
 ### Create read workflow tool 
 
 ```python
-def create_read_workflow_tool(tool_type: opengradient.alphasense.types.ToolType, workflow_contract_address: str, tool_name: str, tool_description: str, output_formatter: Callable[..., str] = <function <lambda>>) ‑> Union[langchain_core.tools.base.BaseTool, Callable]
+def create_read_workflow_tool(tool_type: opengradient.alphasense.types.ToolType, workflow_contract_address: str, tool_name: str, tool_description: str, alpha: Optional[opengradient.client.alpha.Alpha] = None, output_formatter: Callable[..., str] = <function <lambda>>) ‑> Union[langchain_core.tools.base.BaseTool, Callable]
 ```
 
   
@@ -44,7 +44,7 @@ Callable: For ToolType.SWARM, returns a decorated function with appropriate meta
 ### Create run model tool 
 
 ```python
-def create_run_model_tool(tool_type: opengradient.alphasense.types.ToolType, model_cid: str, tool_name: str, model_input_provider: Callable[..., Dict[str, Union[str, int, float, List, numpy.ndarray]]], model_output_formatter: Callable[[opengradient.types.InferenceResult], str], tool_input_schema: Optional[Type[pydantic.main.BaseModel]] = None, tool_description: str = 'Executes the given ML model', inference_mode: opengradient.types.InferenceMode = InferenceMode.VANILLA) ‑> Union[langchain_core.tools.base.BaseTool, Callable]
+def create_run_model_tool(tool_type: opengradient.alphasense.types.ToolType, model_cid: str, tool_name: str, model_input_provider: Callable[..., Dict[str, Union[str, int, float, List, numpy.ndarray]]], model_output_formatter: Callable[[opengradient.types.InferenceResult], str], inference: Optional[opengradient.client.onchain_inference.Inference] = None, tool_input_schema: Optional[Type[pydantic.main.BaseModel]] = None, tool_description: str = 'Executes the given ML model', inference_mode: opengradient.types.InferenceMode = InferenceMode.VANILLA) ‑> Union[langchain_core.tools.base.BaseTool, Callable]
 ```
 
   
diff --git a/docs/opengradient/alpha.md b/docs/opengradient/client/alpha.md
similarity index 94%
rename from docs/opengradient/alpha.md
rename to docs/opengradient/client/alpha.md
index dee2bca..3bb7481 100644
--- a/docs/opengradient/alpha.md
+++ b/docs/opengradient/client/alpha.md
@@ -4,7 +4,7 @@ outline: [2,3]
 
   
 
-# Package opengradient.alpha
+# Package opengradient.client.alpha
 
 Alpha Testnet features for OpenGradient SDK.
 
@@ -16,7 +16,7 @@ including workflow management and ML model execution.
 
 ###  Alpha
 
-<code>class <b>Alpha</b>(client: Client)</code>
+<code>class <b>Alpha</b>(blockchain: [Web3](docs/main.md#Web3), wallet_account: [local](docs/signers.md#local))</code>
 
   
 
diff --git a/docs/opengradient/client/client.md b/docs/opengradient/client/client.md
new file mode 100644
index 0000000..8e34daa
--- /dev/null
+++ b/docs/opengradient/client/client.md
@@ -0,0 +1,53 @@
+---
+outline: [2,3]
+---
+
+  
+
+# Package opengradient.client.client
+
+## Classes
+    
+
+###  Client
+
+<code>class <b>Client</b>(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai')</code>
+
+  
+
+  
+Initialize the OpenGradient client.
+  
+
+**Arguments**
+
+* **`private_key`**: Private key for OpenGradient transactions.
+* **`email`**: Email for Model Hub authentication. Optional.
+* **`password`**: Password for Model Hub authentication. Optional.
+* **`rpc_url`**: RPC URL for the blockchain network.
+* **`api_url`**: API URL for the OpenGradient API.
+* **`contract_address`**: Inference contract address.
+* **`og_llm_server_url`**: OpenGradient LLM server URL.
+* **`og_llm_streaming_server_url`**: OpenGradient LLM streaming server URL.
+  
+
+#### Variables
+
+  
+    
+* static `inference  : opengradient.client.onchain_inference.Inference` - The type of the None singleton.
+    
+* static `llm  : opengradient.client.llm.LLM` - The type of the None singleton.
+    
+* static `model_hub  : opengradient.client.model_hub.ModelHub` - The type of the None singleton.
+
+  
+    
+* `alpha` - Access Alpha Testnet features.
+
+  Returns:
+    Alpha: Alpha namespace with workflow and ML model execution methods.
+
+  Example:
+    client = og.Client(...)
+    result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
\ No newline at end of file
diff --git a/docs/opengradient/client/exceptions.md b/docs/opengradient/client/exceptions.md
new file mode 100644
index 0000000..4a7ee8f
--- /dev/null
+++ b/docs/opengradient/client/exceptions.md
@@ -0,0 +1,188 @@
+---
+outline: [2,3]
+---
+
+  
+
+# Package opengradient.client.exceptions
+
+## Classes
+    
+
+###  AuthenticationError
+
+<code>class <b>AuthenticationError</b>(message='Authentication failed', **kwargs)</code>
+
+  
+
+  
+Raised when there's an authentication error
+  
+
+      
+    
+
+###  FileNotFoundError
+
+<code>class <b>FileNotFoundError</b>(file_path)</code>
+
+  
+
+  
+Raised when a file is not found
+  
+
+      
+    
+
+###  InferenceError
+
+<code>class <b>InferenceError</b>(message, model_cid=None, **kwargs)</code>
+
+  
+
+  
+Raised when there's an error during inference
+  
+
+      
+    
+
+###  InsufficientCreditsError
+
+<code>class <b>InsufficientCreditsError</b>(message='Insufficient credits', required_credits=None, available_credits=None, **kwargs)</code>
+
+  
+
+  
+Raised when the user has insufficient credits for the operation
+  
+
+      
+    
+
+###  InvalidInputError
+
+<code>class <b>InvalidInputError</b>(message, invalid_fields=None, **kwargs)</code>
+
+  
+
+  
+Raised when invalid input is provided
+  
+
+      
+    
+
+###  NetworkError
+
+<code>class <b>NetworkError</b>(message, status_code=None, response=None)</code>
+
+  
+
+  
+Raised when a network error occurs
+  
+
+      
+    
+
+###  OpenGradientError
+
+<code>class <b>OpenGradientError</b>(message, status_code=None, response=None)</code>
+
+  
+
+  
+Base exception for OpenGradient SDK
+  
+
+#### Subclasses
+  * `AuthenticationError`
+  * `FileNotFoundError`
+  * `InferenceError`
+  * `InsufficientCreditsError`
+  * `InvalidInputError`
+  * `NetworkError`
+  * `RateLimitError`
+  * `ResultRetrievalError`
+  * `ServerError`
+  * `TimeoutError`
+  * `UnsupportedModelError`
+  * `UploadError`
+      
+    
+
+###  RateLimitError
+
+<code>class <b>RateLimitError</b>(message='Rate limit exceeded', retry_after=None, **kwargs)</code>
+
+  
+
+  
+Raised when API rate limit is exceeded
+  
+
+      
+    
+
+###  ResultRetrievalError
+
+<code>class <b>ResultRetrievalError</b>(message, inference_cid=None, **kwargs)</code>
+
+  
+
+  
+Raised when there's an error retrieving results
+  
+
+      
+    
+
+###  ServerError
+
+<code>class <b>ServerError</b>(message, status_code=None, response=None)</code>
+
+  
+
+  
+Raised when a server error occurs
+  
+
+      
+    
+
+###  TimeoutError
+
+<code>class <b>TimeoutError</b>(message='Request timed out', timeout=None, **kwargs)</code>
+
+  
+
+  
+Raised when a request times out
+  
+
+      
+    
+
+###  UnsupportedModelError
+
+<code>class <b>UnsupportedModelError</b>(model_type)</code>
+
+  
+
+  
+Raised when an unsupported model type is used
+  
+
+      
+    
+
+###  UploadError
+
+<code>class <b>UploadError</b>(message, file_path=None, **kwargs)</code>
+
+  
+
+  
+Raised when there's an error during file upload
\ No newline at end of file
diff --git a/docs/opengradient/client/index.md b/docs/opengradient/client/index.md
new file mode 100644
index 0000000..0deba74
--- /dev/null
+++ b/docs/opengradient/client/index.md
@@ -0,0 +1,63 @@
+---
+outline: [2,3]
+---
+
+  
+
+# Package opengradient.client
+
+## Submodules
+
+* [alpha](docs/client.md#alpha): Alpha Testnet features for OpenGradient SDK.
+* [client](docs/client.md#client): 
+* [exceptions](docs/client.md#exceptions): 
+* [llm](docs/client.md#llm): 
+* [model_hub](docs/client.md#model_hub): 
+* [onchain_inference](docs/client.md#onchain_inference): 
+* [x402_auth](docs/client.md#x402_auth): X402 Authentication handler for httpx streaming requests.
+
+## Classes
+    
+
+###  Client
+
+<code>class <b>Client</b>(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai')</code>
+
+  
+
+  
+Initialize the OpenGradient client.
+  
+
+**Arguments**
+
+* **`private_key`**: Private key for OpenGradient transactions.
+* **`email`**: Email for Model Hub authentication. Optional.
+* **`password`**: Password for Model Hub authentication. Optional.
+* **`rpc_url`**: RPC URL for the blockchain network.
+* **`api_url`**: API URL for the OpenGradient API.
+* **`contract_address`**: Inference contract address.
+* **`og_llm_server_url`**: OpenGradient LLM server URL.
+* **`og_llm_streaming_server_url`**: OpenGradient LLM streaming server URL.
+  
+
+#### Variables
+
+  
+    
+* static `inference  : opengradient.client.onchain_inference.Inference` - The type of the None singleton.
+    
+* static `llm  : opengradient.client.llm.LLM` - The type of the None singleton.
+    
+* static `model_hub  : opengradient.client.model_hub.ModelHub` - The type of the None singleton.
+
+  
+    
+* `alpha` - Access Alpha Testnet features.
+
+  Returns:
+    Alpha: Alpha namespace with workflow and ML model execution methods.
+
+  Example:
+    client = og.Client(...)
+    result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
\ No newline at end of file
diff --git a/docs/opengradient/client/llm.md b/docs/opengradient/client/llm.md
new file mode 100644
index 0000000..65a7ad7
--- /dev/null
+++ b/docs/opengradient/client/llm.md
@@ -0,0 +1,70 @@
+---
+outline: [2,3]
+---
+
+  
+
+# Package opengradient.client.llm
+
+## Classes
+    
+
+###  LLM
+
+<code>class <b>LLM</b>(wallet_account: [local](docs/signers.md#local), og_llm_server_url: str, og_llm_streaming_server_url: str)</code>
+
+  
+
+  
+
+  
+
+  
+
+### Chat 
+
+```python
+def chat(self, model: opengradient.types.TEE_LLM, messages: List[Dict], max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, tools: Optional[List[Dict]] = [], tool_choice: Optional[str] = None, x402_settlement_mode: Optional[opengradient.types.x402SettlementMode] = x402SettlementMode.SETTLE_BATCH, stream: bool = False) ‑> Union[opengradient.types.TextGenerationOutput, opengradient.types.TextGenerationStream]
+```
+
+  
+
+  
+Perform inference on an LLM model using chat via TEE.
+  
+
+**Returns**
+
+Union[TextGenerationOutput, TextGenerationStream]:
+    - If stream=False: TextGenerationOutput with chat_output, transaction_hash, finish_reason, and payment_hash
+    - If stream=True: TextGenerationStream yielding StreamChunk objects with typed deltas (true streaming via threading)
+
+**Raises**
+
+* **`OpenGradientError`**: If the inference fails.
+  
+
+  
+
+### Completion 
+
+```python
+def completion(self, model: opengradient.types.TEE_LLM, prompt: str, max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, x402_settlement_mode: Optional[opengradient.types.x402SettlementMode] = x402SettlementMode.SETTLE_BATCH) ‑> opengradient.types.TextGenerationOutput
+```
+
+  
+
+  
+Perform inference on an LLM model using completions via TEE.
+  
+
+**Returns**
+
+TextGenerationOutput: Generated text results including:
+    - Transaction hash ("external" for TEE providers)
+    - String of completion output
+    - Payment hash for x402 transactions
+
+**Raises**
+
+* **`OpenGradientError`**: If the inference fails.
\ No newline at end of file
diff --git a/docs/opengradient/client/model_hub.md b/docs/opengradient/client/model_hub.md
new file mode 100644
index 0000000..e7efc3c
--- /dev/null
+++ b/docs/opengradient/client/model_hub.md
@@ -0,0 +1,111 @@
+---
+outline: [2,3]
+---
+
+  
+
+# Package opengradient.client.model_hub
+
+## Classes
+    
+
+###  ModelHub
+
+<code>class <b>ModelHub</b>(hub_user: Optional[Dict] = None)</code>
+
+  
+
+  
+
+  
+
+  
+
+### Create model 
+
+```python
+def create_model(self, model_name: str, model_desc: str, version: str = '1.00') ‑> opengradient.types.ModelRepository
+```
+
+  
+
+  
+Create a new model with the given model_name and model_desc, and a specified version.
+  
+
+**Returns**
+
+dict: The server response containing model details.
+
+**Raises**
+
+* **`CreateModelError`**: If the model creation fails.
+  
+
+  
+
+### Create version 
+
+```python
+def create_version(self, model_name: str, notes: str = '', is_major: bool = False) ‑> dict
+```
+
+  
+
+  
+Create a new version for the specified model.
+  
+
+**Returns**
+
+dict: The server response containing version details.
+
+**Raises**
+
+* **`Exception`**: If the version creation fails.
+  
+
+  
+
+### List files 
+
+```python
+def list_files(self, model_name: str, version: str) ‑> List[Dict]
+```
+
+  
+
+  
+List files for a specific version of a model.
+  
+
+**Returns**
+
+List[Dict]: A list of dictionaries containing file information.
+
+**Raises**
+
+* **`OpenGradientError`**: If the file listing fails.
+  
+
+  
+
+### Upload 
+
+```python
+def upload(self, model_path: str, model_name: str, version: str) ‑> opengradient.types.FileUploadResult
+```
+
+  
+
+  
+Upload a model file to the server.
+  
+
+**Returns**
+
+dict: The processed result.
+
+**Raises**
+
+* **`OpenGradientError`**: If the upload fails.
\ No newline at end of file
diff --git a/docs/opengradient/client/onchain_inference.md b/docs/opengradient/client/onchain_inference.md
new file mode 100644
index 0000000..7bdd739
--- /dev/null
+++ b/docs/opengradient/client/onchain_inference.md
@@ -0,0 +1,53 @@
+---
+outline: [2,3]
+---
+
+  
+
+# Package opengradient.client.onchain_inference
+
+## Classes
+    
+
+###  Inference
+
+<code>class <b>Inference</b>(blockchain: [Web3](docs/main.md#Web3), wallet_account: [local](docs/signers.md#local), inference_hub_contract_address: str, api_url: str)</code>
+
+  
+
+  
+
+  
+
+  
+
+### Infer 
+
+```python
+def infer(self, model_cid: str, inference_mode: opengradient.types.InferenceMode, model_input: Dict[str, Union[str, int, float, List, numpy.ndarray]], max_retries: Optional[int] = None) ‑> opengradient.types.InferenceResult
+```
+
+  
+
+  
+Perform inference on a model.
+  
+
+**Returns**
+
+InferenceResult (InferenceResult): A dataclass object containing the transaction hash and model output.
+    transaction_hash (str): Blockchain hash for the transaction
+    model_output (Dict[str, np.ndarray]): Output of the ONNX model
+
+**Raises**
+
+* **`OpenGradientError`**: If the inference fails.
+  
+
+#### Variables
+
+  
+    
+* `inference_abi  : dict`
+    
+* `precompile_abi  : dict`
\ No newline at end of file
diff --git a/docs/opengradient/x402_auth.md b/docs/opengradient/client/x402_auth.md
similarity index 97%
rename from docs/opengradient/x402_auth.md
rename to docs/opengradient/client/x402_auth.md
index 3325623..12f8a3c 100644
--- a/docs/opengradient/x402_auth.md
+++ b/docs/opengradient/client/x402_auth.md
@@ -4,7 +4,7 @@ outline: [2,3]
 
   
 
-# Package opengradient.x402_auth
+# Package opengradient.client.x402_auth
 
 X402 Authentication handler for httpx streaming requests.
 
diff --git a/docs/opengradient/index.md b/docs/opengradient/index.md
index 97f4043..1db4966 100644
--- a/docs/opengradient/index.md
+++ b/docs/opengradient/index.md
@@ -10,258 +10,45 @@ OpenGradient Python SDK for interacting with AI models and infrastructure.
 
 ## Submodules
 
-* [**alpha**](./alpha): Alpha Testnet features for OpenGradient SDK.
+* [**agents**](./agents): OpenGradient LLM Adapters
 * [**alphasense**](./alphasense): OpenGradient AlphaSense Tools
-* [**llm**](./llm): OpenGradient LLM Adapters
+* [**client**](./client): 
+* [**types**](./types): 
 * [**workflow_models**](./workflow_models): OpenGradient Hardcoded Models
-* [**x402_auth**](./x402_auth): X402 Authentication handler for httpx streaming requests.
 
 ## Functions
 
   
 
-### Create model 
-
-```python
-def create_model(model_name: str, model_desc: str, model_path: Optional[str] = None) ‑> opengradient.types.ModelRepository
-```
-
-  
-
-  
-Create a new model repository.
-  
-
-**Arguments**
-
-* **`model_name`**: Name for the new model repository
-* **`model_desc`**: Description of the model
-* **`model_path`**: Optional path to model file to upload immediately
-
-  
-**Returns**
-
-ModelRepository: Creation response with model metadata and optional upload results
-
-**Raises**
-
-* **`RuntimeError`**: If SDK is not initialized
-  
-
-  
-
-### Create version 
-
-```python
-def create_version(model_name, notes=None, is_major=False)
-```
-
-  
-
-  
-Create a new version for an existing model.
-  
-
-**Arguments**
-
-* **`model_name`**: Name of the model repository
-* **`notes`**: Optional release notes for this version
-* **`is_major`**: If True, creates a major version bump instead of minor
-
-  
-**Returns**
-
-dict: Version creation response with version metadata
-
-**Raises**
-
-* **`RuntimeError`**: If SDK is not initialized
-  
-
-  
-
-### Infer 
-
-```python
-def infer(model_cid, inference_mode, model_input, max_retries: Optional[int] = None) ‑> opengradient.types.InferenceResult
-```
-
-  
-
-  
-Run inference on a model.
-  
-
-**Arguments**
-
-* **`model_cid`**: CID of the model to use
-* **`inference_mode`**: Mode of inference (e.g. VANILLA)
-* **`model_input`**: Input data for the model
-* **`max_retries`**: Maximum number of retries for failed transactions
-
-  
-**Returns**
-
-InferenceResult (InferenceResult): A dataclass object containing the transaction hash and model output.
-    * transaction_hash (str): Blockchain hash for the transaction
-    * model_output (Dict[str, np.ndarray]): Output of the ONNX model
-
-**Raises**
-
-* **`RuntimeError`**: If SDK is not initialized
-  
-
-  
-
 ### Init 
 
 ```python
-def init(email: str, password: str, private_key: str, rpc_url='https://ogevmdevnet.opengradient.ai', api_url='https://sdk-devnet.opengradient.ai', contract_address='0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE')
-```
-
-  
-
-  
-Initialize the OpenGradient SDK with authentication and network settings.
-  
-
-**Arguments**
-
-* **`email`**: User's email address for authentication
-* **`password`**: User's password for authentication
-* **`private_key`**: Ethereum private key for blockchain transactions
-* **`rpc_url`**: Optional RPC URL for the blockchain network, defaults to testnet
-* **`api_url`**: Optional API URL for the OpenGradient API, defaults to testnet
-* **`contract_address`**: Optional inference contract address
-  
-
-  
-
-### List files 
-
-```python
-def list_files(model_name: str, version: str) ‑> List[Dict]
-```
-
-  
-
-  
-List files in a model repository version.
-  
-
-**Arguments**
-
-* **`model_name`**: Name of the model repository
-* **`version`**: Version string to list files from
-
-  
-**Returns**
-
-List[Dict]: List of file metadata dictionaries
-
-**Raises**
-
-* **`RuntimeError`**: If SDK is not initialized
-  
-
-  
-
-### Llm chat 
-
-```python
-def llm_chat(model_cid: opengradient.types.LLM, messages: List[Dict], max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, x402_settlement_mode: Optional[opengradient.types.x402SettlementMode] = settle-batch, stream: Optional[bool] = False) ‑> Union[opengradient.types.TextGenerationOutput, opengradient.types.TextGenerationStream]
-```
-
-  
-
-  
-Have a chat conversation with an LLM via TEE.
-  
-
-**Arguments**
-
-* **`model_cid`**: CID of the LLM model to use (e.g., 'anthropic/claude-3.5-haiku')
-* **`messages`**: List of chat messages, each with 'role' and 'content'
-* **`max_tokens`**: Maximum tokens to generate
-* **`stop_sequence`**: Optional list of sequences where generation should stop
-* **`temperature`**: Sampling temperature (0.0 = deterministic, 1.0 = creative)
-* **`tools`**: Optional list of tools the model can use
-* **`tool_choice`**: Optional specific tool to use
-* **`x402_settlement_mode`**: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
-* **`stream`**: Optional boolean to enable streaming
-
-  
-**Returns**
-
-TextGenerationOutput or TextGenerationStream
-
-**Raises**
-
-* **`RuntimeError`**: If SDK is not initialized
-  
-
-  
-
-### Llm completion 
-
-```python
-def llm_completion(model_cid: opengradient.types.LLM, prompt: str, max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, x402_settlement_mode: Optional[opengradient.types.x402SettlementMode] = settle-batch) ‑> opengradient.types.TextGenerationOutput
-```
-
-  
-
-  
-Generate text completion using an LLM via TEE.
-  
-
-**Arguments**
-
-* **`model_cid`**: CID of the LLM model to use (e.g., 'anthropic/claude-3.5-haiku')
-* **`prompt`**: Text prompt for completion
-* **`max_tokens`**: Maximum tokens to generate
-* **`stop_sequence`**: Optional list of sequences where generation should stop
-* **`temperature`**: Sampling temperature (0.0 = deterministic, 1.0 = creative)
-* **`x402_settlement_mode`**: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
-
-  
-**Returns**
-
-TextGenerationOutput: Transaction hash and generated text
-
-**Raises**
-
-* **`RuntimeError`**: If SDK is not initialized
-  
-
-  
-
-### Upload 
-
-```python
-def upload(model_path, model_name, version) ‑> opengradient.types.FileUploadResult
+def init(private_key: str, email: Optional[str] = None, password: Optional[str] = None, **kwargs) ‑> opengradient.client.client.Client
 ```
 
   
 
   
-Upload a model file to OpenGradient.
+Initialize the global OpenGradient client.
   
 
 **Arguments**
 
-* **`model_path`**: Path to the model file on local filesystem
-* **`model_name`**: Name of the model repository
-* **`version`**: Version string for this model upload
+* **`private_key`**: Private key for OpenGradient transactions.
+* **`email`**: Email for Model Hub authentication. Optional.
+* **`password`**: Password for Model Hub authentication. Optional.
+* **`**kwargs`**: Additional arguments forwarded to :class:`Client`.
 
   
 **Returns**
 
-FileUploadResult: Upload response containing file metadata
+The newly created :class:`Client` instance.
 
-**Raises**
+## Global variables
 
-* **`RuntimeError`**: If SDK is not initialized
   
+    
+* `global_client  : Optional[opengradient.client.client.Client]` - Global client instance. Set by calling :func:`init`.
 
 ## Classes
     
@@ -314,9 +101,55 @@ Enum where members are also (and must be) ints
       
     
 
+###  Client
+
+<code>class <b>Client</b>(private_key: str, email: Optional[str] = None, password: Optional[str] = None, rpc_url: str = 'https://ogevmdevnet.opengradient.ai', api_url: str = 'https://sdk-devnet.opengradient.ai', contract_address: str = '0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE', og_llm_server_url: Optional[str] = 'https://llmogevm.opengradient.ai', og_llm_streaming_server_url: Optional[str] = 'https://llmogevm.opengradient.ai')</code>
+
+  
+
+  
+Initialize the OpenGradient client.
+  
+
+**Arguments**
+
+* **`private_key`**: Private key for OpenGradient transactions.
+* **`email`**: Email for Model Hub authentication. Optional.
+* **`password`**: Password for Model Hub authentication. Optional.
+* **`rpc_url`**: RPC URL for the blockchain network.
+* **`api_url`**: API URL for the OpenGradient API.
+* **`contract_address`**: Inference contract address.
+* **`og_llm_server_url`**: OpenGradient LLM server URL.
+* **`og_llm_streaming_server_url`**: OpenGradient LLM streaming server URL.
+  
+
+#### Variables
+
+  
+    
+* static `inference  : opengradient.client.onchain_inference.Inference` - The type of the None singleton.
+    
+* static `llm  : opengradient.client.llm.LLM` - The type of the None singleton.
+    
+* static `model_hub  : opengradient.client.model_hub.ModelHub` - The type of the None singleton.
+
+  
+    
+* `alpha` - Access Alpha Testnet features.
+
+  Returns:
+    Alpha: Alpha namespace with workflow and ML model execution methods.
+
+  Example:
+    client = og.Client(...)
+    result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
+
+      
+    
+
 ###  HistoricalInputQuery
 
-<code>class <b>HistoricalInputQuery</b>(base: str, quote: str, total_candles: int, candle_duration_in_mins: int, order: [CandleOrder](docs/types.md#CandleOrder), candle_types: List[[CandleType](docs/types.md#CandleType)])</code>
+<code>class <b>HistoricalInputQuery</b>(base: str, quote: str, total_candles: int, candle_duration_in_mins: int, order: `CandleOrder`, candle_types: List[`CandleType`])</code>
 
   
 
diff --git a/docs/opengradient/types.md b/docs/opengradient/types.md
new file mode 100644
index 0000000..580f866
--- /dev/null
+++ b/docs/opengradient/types.md
@@ -0,0 +1,774 @@
+---
+outline: [2,3]
+---
+
+  
+
+# Package opengradient.types
+
+## Classes
+    
+
+###  Abi
+
+<code>class <b>Abi</b>(functions: List[`AbiFunction`])</code>
+
+  
+
+  
+Abi(functions: List[opengradient.types.AbiFunction])
+  
+
+  
+
+### From json 
+
+```python
+def from_json(abi_json)
+```
+
+  
+
+  
+
+  
+
+#### Variables
+
+  
+    
+* static `functions  : List[opengradient.types.AbiFunction]` - The type of the None singleton.
+
+      
+    
+
+###  AbiFunction
+
+<code>class <b>AbiFunction</b>(name: str, inputs: List[Union[str, ForwardRef('`AbiFunction`')]], outputs: List[Union[str, ForwardRef('`AbiFunction`')]], state_mutability: str)</code>
+
+  
+
+  
+AbiFunction(name: str, inputs: List[Union[str, ForwardRef('AbiFunction')]], outputs: List[Union[str, ForwardRef('AbiFunction')]], state_mutability: str)
+  
+
+#### Variables
+
+  
+    
+* static `inputs  : List[Union[str, opengradient.types.AbiFunction]]` - The type of the None singleton.
+    
+* static `name  : str` - The type of the None singleton.
+    
+* static `outputs  : List[Union[str, opengradient.types.AbiFunction]]` - The type of the None singleton.
+    
+* static `state_mutability  : str` - The type of the None singleton.
+
+      
+    
+
+###  CandleOrder
+
+<code>class <b>CandleOrder</b>(*args, **kwds)</code>
+
+  
+
+  
+Enum where members are also (and must be) ints
+  
+
+#### Variables
+
+  
+    
+* static `ASCENDING` - The type of the None singleton.
+    
+* static `DESCENDING` - The type of the None singleton.
+
+      
+    
+
+###  CandleType
+
+<code>class <b>CandleType</b>(*args, **kwds)</code>
+
+  
+
+  
+Enum where members are also (and must be) ints
+  
+
+#### Variables
+
+  
+    
+* static `CLOSE` - The type of the None singleton.
+    
+* static `HIGH` - The type of the None singleton.
+    
+* static `LOW` - The type of the None singleton.
+    
+* static `OPEN` - The type of the None singleton.
+    
+* static `VOLUME` - The type of the None singleton.
+
+      
+    
+
+###  FileUploadResult
+
+<code>class <b>FileUploadResult</b>(modelCid: str, size: int)</code>
+
+  
+
+  
+FileUploadResult(modelCid: str, size: int)
+  
+
+#### Variables
+
+  
+    
+* static `modelCid  : str` - The type of the None singleton.
+    
+* static `size  : int` - The type of the None singleton.
+
+      
+    
+
+###  HistoricalInputQuery
+
+<code>class <b>HistoricalInputQuery</b>(base: str, quote: str, total_candles: int, candle_duration_in_mins: int, order: `CandleOrder`, candle_types: List[`CandleType`])</code>
+
+  
+
+  
+HistoricalInputQuery(base: str, quote: str, total_candles: int, candle_duration_in_mins: int, order: opengradient.types.CandleOrder, candle_types: List[opengradient.types.CandleType])
+  
+
+  
+
+### To abi format 
+
+```python
+def to_abi_format(self) ‑> tuple
+```
+
+  
+
+  
+Convert to format expected by contract ABI
+  
+
+#### Variables
+
+  
+    
+* static `base  : str` - The type of the None singleton.
+    
+* static `candle_duration_in_mins  : int` - The type of the None singleton.
+    
+* static `candle_types  : List[opengradient.types.CandleType]` - The type of the None singleton.
+    
+* static `order  : opengradient.types.CandleOrder` - The type of the None singleton.
+    
+* static `quote  : str` - The type of the None singleton.
+    
+* static `total_candles  : int` - The type of the None singleton.
+
+      
+    
+
+###  InferenceMode
+
+<code>class <b>InferenceMode</b>(*args, **kwds)</code>
+
+  
+
+  
+Enum for the different inference modes available for inference (VANILLA, ZKML, TEE)
+  
+
+#### Variables
+
+  
+    
+* static `TEE` - The type of the None singleton.
+    
+* static `VANILLA` - The type of the None singleton.
+    
+* static `ZKML` - The type of the None singleton.
+
+      
+    
+
+###  InferenceResult
+
+<code>class <b>InferenceResult</b>(transaction_hash: str, model_output: Dict[str, [**ndarray**](./ndarray)])</code>
+
+  
+
+  
+Output for ML inference requests.
+This class has two fields
+    transaction_hash (str): Blockchain hash for the transaction
+    model_output (Dict[str, np.ndarray]): Output of the ONNX model
+  
+
+#### Variables
+
+  
+    
+* static `model_output  : Dict[str, numpy.ndarray]` - The type of the None singleton.
+    
+* static `transaction_hash  : str` - The type of the None singleton.
+
+      
+    
+
+###  LLM
+
+<code>class <b>LLM</b>(*args, **kwds)</code>
+
+  
+
+  
+Enum for available LLM models in OpenGradient.
+
+These models can be used with llm_chat() and llm_completion() methods.
+You can use either the enum value or the string identifier directly.
+  
+
+**Note**
+
+TEE_LLM enum contains the same models but is specifically for
+Trusted Execution Environment (TEE) verified inference.
+
+#### Variables
+
+  
+    
+* static `CLAUDE_3_5_HAIKU` - The type of the None singleton.
+    
+* static `CLAUDE_3_7_SONNET` - The type of the None singleton.
+    
+* static `CLAUDE_4_0_SONNET` - The type of the None singleton.
+    
+* static `GEMINI_2_0_FLASH` - The type of the None singleton.
+    
+* static `GEMINI_2_5_FLASH` - The type of the None singleton.
+    
+* static `GEMINI_2_5_FLASH_LITE` - The type of the None singleton.
+    
+* static `GEMINI_2_5_PRO` - The type of the None singleton.
+    
+* static `GPT_4O` - The type of the None singleton.
+    
+* static `GPT_4_1_2025_04_14` - The type of the None singleton.
+    
+* static `GROK_2_1212` - The type of the None singleton.
+    
+* static `GROK_2_VISION_LATEST` - The type of the None singleton.
+    
+* static `GROK_3_BETA` - The type of the None singleton.
+    
+* static `GROK_3_MINI_BETA` - The type of the None singleton.
+    
+* static `GROK_4_1_FAST` - The type of the None singleton.
+    
+* static `GROK_4_1_FAST_NON_REASONING` - The type of the None singleton.
+    
+* static `O4_MINI` - The type of the None singleton.
+
+      
+    
+
+###  ModelInput
+
+<code>class <b>ModelInput</b>(numbers: List[`NumberTensor`], strings: List[`StringTensor`])</code>
+
+  
+
+  
+A collection of tensor inputs required for ONNX model inference.
+  
+
+**Attributes**
+
+* **`numbers`**: Collection of numeric tensors for the model.
+* **`strings`**: Collection of string tensors for the model.
+  
+
+#### Variables
+
+  
+    
+* static `numbers  : List[opengradient.types.NumberTensor]` - The type of the None singleton.
+    
+* static `strings  : List[opengradient.types.StringTensor]` - The type of the None singleton.
+
+      
+    
+
+###  ModelOutput
+
+<code>class <b>ModelOutput</b>(numbers: Dict[str, [**ndarray**](./ndarray)], strings: Dict[str, [**ndarray**](./ndarray)], jsons: Dict[str, [**ndarray**](./ndarray)], is_simulation_result: bool)</code>
+
+  
+
+  
+Model output struct based on translations from smart contract.
+  
+
+#### Variables
+
+  
+    
+* static `is_simulation_result  : bool` - The type of the None singleton.
+    
+* static `jsons  : Dict[str, numpy.ndarray]` - The type of the None singleton.
+    
+* static `numbers  : Dict[str, numpy.ndarray]` - The type of the None singleton.
+    
+* static `strings  : Dict[str, numpy.ndarray]` - The type of the None singleton.
+
+      
+    
+
+###  ModelRepository
+
+<code>class <b>ModelRepository</b>(name: str, initialVersion: str)</code>
+
+  
+
+  
+ModelRepository(name: str, initialVersion: str)
+  
+
+#### Variables
+
+  
+    
+* static `initialVersion  : str` - The type of the None singleton.
+    
+* static `name  : str` - The type of the None singleton.
+
+      
+    
+
+###  Number
+
+<code>class <b>Number</b>(value: int, decimals: int)</code>
+
+  
+
+  
+Number(value: int, decimals: int)
+  
+
+#### Variables
+
+  
+    
+* static `decimals  : int` - The type of the None singleton.
+    
+* static `value  : int` - The type of the None singleton.
+
+      
+    
+
+###  NumberTensor
+
+<code>class <b>NumberTensor</b>(name: str, values: List[Tuple[int, int]])</code>
+
+  
+
+  
+A container for numeric tensor data used as input for ONNX models.
+  
+
+**Attributes**
+
+* **`name`**: Identifier for this tensor in the model.
+* **`values`**: List of integer tuples representing the tensor data.
+  
+
+#### Variables
+
+  
+    
+* static `name  : str` - The type of the None singleton.
+    
+* static `values  : List[Tuple[int, int]]` - The type of the None singleton.
+
+      
+    
+
+###  SchedulerParams
+
+<code>class <b>SchedulerParams</b>(frequency: int, duration_hours: int)</code>
+
+  
+
+  
+SchedulerParams(frequency: int, duration_hours: int)
+  
+
+  
+
+### From dict 
+
+```python
+def from_dict(data: Optional[Dict[str, int]]) ‑> Optional[opengradient.types.SchedulerParams]
+```
+
+  
+
+  
+
+  
+
+#### Variables
+
+  
+    
+* static `duration_hours  : int` - The type of the None singleton.
+    
+* static `frequency  : int` - The type of the None singleton.
+
+  
+    
+* `end_time  : int`
+
+      
+    
+
+###  StreamChoice
+
+<code>class <b>StreamChoice</b>(delta: `StreamDelta`, index: int = 0, finish_reason: Optional[str] = None)</code>
+
+  
+
+  
+Represents a choice in a streaming response.
+  
+
+**Attributes**
+
+* **`delta`**: The incremental changes in this chunk
+* **`index`**: Choice index (usually 0)
+* **`finish_reason`**: Reason for completion (appears in final chunk)
+  
+
+#### Variables
+
+  
+    
+* static `delta  : opengradient.types.StreamDelta` - The type of the None singleton.
+    
+* static `finish_reason  : Optional[str]` - The type of the None singleton.
+    
+* static `index  : int` - The type of the None singleton.
+
+      
+    
+
+###  StreamChunk
+
+<code>class <b>StreamChunk</b>(choices: List[`StreamChoice`], model: str, usage: Optional[`StreamUsage`] = None, is_final: bool = False)</code>
+
+  
+
+  
+Represents a single chunk in a streaming LLM response.
+
+This follows the OpenAI streaming format but is provider-agnostic.
+Each chunk contains incremental data, with the final chunk including
+usage information.
+  
+
+**Attributes**
+
+* **`choices`**: List of streaming choices (usually contains one choice)
+* **`model`**: Model identifier
+* **`usage`**: Token usage information (only in final chunk)
+* **`is_final`**: Whether this is the final chunk (before [DONE])
+  
+
+  
+
+### From sse data 
+
+```python
+def from_sse_data(data: Dict) ‑> opengradient.types.StreamChunk
+```
+
+  
+
+  
+Parse a StreamChunk from SSE data dictionary.
+  
+
+**Arguments**
+
+* **`data`**: Dictionary parsed from SSE data line
+
+  
+**Returns**
+
+StreamChunk instance
+
+#### Variables
+
+  
+    
+* static `choices  : List[opengradient.types.StreamChoice]` - The type of the None singleton.
+    
+* static `is_final  : bool` - The type of the None singleton.
+    
+* static `model  : str` - The type of the None singleton.
+    
+* static `usage  : Optional[opengradient.types.StreamUsage]` - The type of the None singleton.
+
+      
+    
+
+###  StreamDelta
+
+<code>class <b>StreamDelta</b>(content: Optional[str] = None, role: Optional[str] = None, tool_calls: Optional[List[Dict]] = None)</code>
+
+  
+
+  
+Represents a delta (incremental change) in a streaming response.
+  
+
+**Attributes**
+
+* **`content`**: Incremental text content (if any)
+* **`role`**: Message role (appears in first chunk)
+* **`tool_calls`**: Tool call information (if function calling is used)
+  
+
+#### Variables
+
+  
+    
+* static `content  : Optional[str]` - The type of the None singleton.
+    
+* static `role  : Optional[str]` - The type of the None singleton.
+    
+* static `tool_calls  : Optional[List[Dict]]` - The type of the None singleton.
+
+      
+    
+
+###  StreamUsage
+
+<code>class <b>StreamUsage</b>(prompt_tokens: int, completion_tokens: int, total_tokens: int)</code>
+
+  
+
+  
+Token usage information for a streaming response.
+  
+
+**Attributes**
+
+* **`prompt_tokens`**: Number of tokens in the prompt
+* **`completion_tokens`**: Number of tokens in the completion
+* **`total_tokens`**: Total tokens used
+  
+
+#### Variables
+
+  
+    
+* static `completion_tokens  : int` - The type of the None singleton.
+    
+* static `prompt_tokens  : int` - The type of the None singleton.
+    
+* static `total_tokens  : int` - The type of the None singleton.
+
+      
+    
+
+###  StringTensor
+
+<code>class <b>StringTensor</b>(name: str, values: List[str])</code>
+
+  
+
+  
+A container for string tensor data used as input for ONNX models.
+  
+
+**Attributes**
+
+* **`name`**: Identifier for this tensor in the model.
+* **`values`**: List of strings representing the tensor data.
+  
+
+#### Variables
+
+  
+    
+* static `name  : str` - The type of the None singleton.
+    
+* static `values  : List[str]` - The type of the None singleton.
+
+      
+    
+
+###  TEE_LLM
+
+<code>class <b>TEE_LLM</b>(*args, **kwds)</code>
+
+  
+
+  
+Enum for LLM models available for TEE (Trusted Execution Environment) execution.
+
+TEE mode provides cryptographic verification that inference was performed
+correctly in a secure enclave. Use this for applications requiring
+auditability and tamper-proof AI inference.
+  
+
+**Note**
+
+The models in TEE_LLM are the same as LLM, but this enum explicitly
+indicates support for TEE execution.
+
+#### Variables
+
+  
+    
+* static `CLAUDE_3_5_HAIKU` - The type of the None singleton.
+    
+* static `CLAUDE_3_7_SONNET` - The type of the None singleton.
+    
+* static `CLAUDE_4_0_SONNET` - The type of the None singleton.
+    
+* static `GEMINI_2_0_FLASH` - The type of the None singleton.
+    
+* static `GEMINI_2_5_FLASH` - The type of the None singleton.
+    
+* static `GEMINI_2_5_FLASH_LITE` - The type of the None singleton.
+    
+* static `GEMINI_2_5_PRO` - The type of the None singleton.
+    
+* static `GPT_4O` - The type of the None singleton.
+    
+* static `GPT_4_1_2025_04_14` - The type of the None singleton.
+    
+* static `GROK_2_1212` - The type of the None singleton.
+    
+* static `GROK_2_VISION_LATEST` - The type of the None singleton.
+    
+* static `GROK_3_BETA` - The type of the None singleton.
+    
+* static `GROK_3_MINI_BETA` - The type of the None singleton.
+    
+* static `GROK_4_1_FAST` - The type of the None singleton.
+    
+* static `GROK_4_1_FAST_NON_REASONING` - The type of the None singleton.
+    
+* static `O4_MINI` - The type of the None singleton.
+
+      
+    
+
+###  TextGenerationOutput
+
+<code>class <b>TextGenerationOutput</b>(transaction_hash: str, finish_reason: Optional[str] = None, chat_output: Optional[Dict] = None, completion_output: Optional[str] = None, payment_hash: Optional[str] = None)</code>
+
+  
+
+  
+Output structure for text generation requests.
+  
+
+#### Variables
+
+  
+    
+* static `chat_output  : Optional[Dict]` - Dictionary of chat response containing role, message content, tool call parameters, etc.. Empty dict if not applicable.
+    
+* static `completion_output  : Optional[str]` - Raw text output from completion-style generation. Empty string if not applicable.
+    
+* static `finish_reason  : Optional[str]` - Reason for completion (e.g., 'tool_call', 'stop', 'error'). Empty string if not applicable.
+    
+* static `payment_hash  : Optional[str]` - Payment hash for x402 transaction
+    
+* static `transaction_hash  : str` - Blockchain hash for the transaction.
+
+      
+    
+
+###  TextGenerationStream
+
+<code>class <b>TextGenerationStream</b>(_iterator: Union[Iterator[str], AsyncIterator[str]])</code>
+
+  
+
+  
+Iterator wrapper for streaming text generation responses.
+
+Provides a clean interface for iterating over stream chunks with
+automatic parsing of SSE format.
+  
+
+      
+    
+
+###  x402SettlementMode
+
+<code>class <b>x402SettlementMode</b>(*args, **kwds)</code>
+
+  
+
+  
+Settlement modes for x402 payment protocol transactions.
+
+These modes control how inference data is recorded on-chain for payment settlement
+and auditability. Each mode offers different trade-offs between data completeness,
+privacy, and transaction costs.
+  
+
+**Attributes**
+
+* **`SETTLE`**: Individual settlement with input/output hashes only.
+        Also known as SETTLE_INDIVIDUAL in some documentation.
+        Records cryptographic hashes of the inference input and output.
+        Most privacy-preserving option - actual data is not stored on-chain.
+        Suitable for applications where only proof of execution is needed.
+        CLI usage: --settlement-mode settle
+* **`SETTLE_METADATA`**: Individual settlement with full metadata.
+        Also known as SETTLE_INDIVIDUAL_WITH_METADATA in some documentation.
+        Records complete model information, full input and output data,
+        and all inference metadata on-chain.
+        Provides maximum transparency and auditability.
+        Higher gas costs due to larger data storage.
+        CLI usage: --settlement-mode settle-metadata
+* **`SETTLE_BATCH`**: Batch settlement for multiple inferences.
+        Aggregates multiple inference requests into a single settlement transaction
+        using batch hashes.
+        Most cost-efficient for high-volume applications.
+        Reduced per-inference transaction overhead.
+        CLI usage: --settlement-mode settle-batch
+
+  
+
+#### Variables
+
+  
+    
+* static `SETTLE` - The type of the None singleton.
+    
+* static `SETTLE_BATCH` - The type of the None singleton.
+    
+* static `SETTLE_INDIVIDUAL` - The type of the None singleton.
+    
+* static `SETTLE_INDIVIDUAL_WITH_METADATA` - The type of the None singleton.
+    
+* static `SETTLE_METADATA` - The type of the None singleton.
\ No newline at end of file
diff --git a/docs/opengradient/workflow_models/utils.md b/docs/opengradient/workflow_models/utils.md
index 5e1b539..2c7fc52 100644
--- a/docs/opengradient/workflow_models/utils.md
+++ b/docs/opengradient/workflow_models/utils.md
@@ -43,7 +43,7 @@ Create block explorer link for transaction.
 ### Read workflow wrapper 
 
 ```python
-def read_workflow_wrapper(contract_address: str, format_function: Callable[..., str]) ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_workflow_wrapper(alpha: opengradient.client.alpha.Alpha, contract_address: str, format_function: Callable[..., str]) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
diff --git a/docs/opengradient/workflow_models/workflow_models.md b/docs/opengradient/workflow_models/workflow_models.md
index 2644acc..cdd17d4 100644
--- a/docs/opengradient/workflow_models/workflow_models.md
+++ b/docs/opengradient/workflow_models/workflow_models.md
@@ -15,7 +15,7 @@ Repository of OpenGradient quantitative workflow models.
 ### Read btc 1 hour price forecast 
 
 ```python
-def read_btc_1_hour_price_forecast() ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_btc_1_hour_price_forecast(alpha: opengradient.client.alpha.Alpha) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
@@ -31,7 +31,7 @@ More information on this model can be found at https://hub.opengradient.ai/model
 ### Read eth 1 hour price forecast 
 
 ```python
-def read_eth_1_hour_price_forecast() ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_eth_1_hour_price_forecast(alpha: opengradient.client.alpha.Alpha) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
@@ -47,7 +47,7 @@ More information on this model can be found at https://hub.opengradient.ai/model
 ### Read eth usdt one hour volatility forecast 
 
 ```python
-def read_eth_usdt_one_hour_volatility_forecast() ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_eth_usdt_one_hour_volatility_forecast(alpha: opengradient.client.alpha.Alpha) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
@@ -63,7 +63,7 @@ More information on this model can be found at https://hub.opengradient.ai/model
 ### Read sol 1 hour price forecast 
 
 ```python
-def read_sol_1_hour_price_forecast() ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_sol_1_hour_price_forecast(alpha: opengradient.client.alpha.Alpha) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
@@ -79,7 +79,7 @@ More information on this model can be found at https://hub.opengradient.ai/model
 ### Read sui 1 hour price forecast 
 
 ```python
-def read_sui_1_hour_price_forecast() ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_sui_1_hour_price_forecast(alpha: opengradient.client.alpha.Alpha) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
@@ -95,7 +95,7 @@ More information on this model can be found at https://hub.opengradient.ai/model
 ### Read sui usdt 30 min price forecast 
 
 ```python
-def read_sui_usdt_30_min_price_forecast() ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_sui_usdt_30_min_price_forecast(alpha: opengradient.client.alpha.Alpha) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
@@ -111,7 +111,7 @@ More information on this model can be found at https://hub.opengradient.ai/model
 ### Read sui usdt 6 hour price forecast 
 
 ```python
-def read_sui_usdt_6_hour_price_forecast() ‑> opengradient.workflow_models.types.WorkflowModelOutput
+def read_sui_usdt_6_hour_price_forecast(alpha: opengradient.client.alpha.Alpha) ‑> opengradient.workflow_models.types.WorkflowModelOutput
 ```
 
   
diff --git a/examples/README.md b/examples/README.md
index a5fe1de..4639b26 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -163,10 +163,10 @@ All examples use a similar pattern to initialize the OpenGradient client:
 import os
 import opengradient as og
 
-og_client = og.new_client(
+og_client = og.Client(
+    private_key=os.environ.get("OG_PRIVATE_KEY"),
     email=os.environ.get("OG_MODEL_HUB_EMAIL"),
     password=os.environ.get("OG_MODEL_HUB_PASSWORD"),
-    private_key=os.environ.get("OG_PRIVATE_KEY")
 )
 ```
 
@@ -175,7 +175,7 @@ og_client = og.new_client(
 Basic inference pattern:
 
 ```python
-result = og_client.infer(
+result = og_client.inference.infer(
     model_cid="your-model-cid",
     model_input={"input_key": "input_value"},
     inference_mode=og.InferenceMode.VANILLA
@@ -189,7 +189,7 @@ print(f"Tx hash: {result.transaction_hash}")
 LLM chat pattern:
 
 ```python
-completion = og_client.llm_chat(
+completion = og_client.llm.chat(
     model=og.TEE_LLM.CLAUDE_3_5_HAIKU,
     messages=[{"role": "user", "content": "Your message"}],
 )
diff --git a/examples/alpha/create_workflow.py b/examples/alpha/create_workflow.py
index a9cab33..ed6bda0 100644
--- a/examples/alpha/create_workflow.py
+++ b/examples/alpha/create_workflow.py
@@ -2,7 +2,7 @@
 
 import opengradient as og
 
-og_client = og.new_client(email=None, password=None, private_key=os.environ.get("OG_PRIVATE_KEY"))
+og_client = og.Client(private_key=os.environ.get("OG_PRIVATE_KEY"))
 
 # Define model input
 input_query = og.HistoricalInputQuery(
diff --git a/examples/alpha/run_embeddings_model.py b/examples/alpha/run_embeddings_model.py
index c6347ee..7ed19e6 100644
--- a/examples/alpha/run_embeddings_model.py
+++ b/examples/alpha/run_embeddings_model.py
@@ -2,7 +2,7 @@
 
 import opengradient as og
 
-og_client = og.new_client(email=None, password=None, private_key=os.environ.get("OG_PRIVATE_KEY"))
+og_client = og.Client(private_key=os.environ.get("OG_PRIVATE_KEY"))
 
 queries = [
     "how much protein should a female eat",
@@ -14,7 +14,7 @@
     "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
 ]
 
-model_embeddings = og_client.infer(
+model_embeddings = og_client.inference.infer(
     model_cid="intfloat/multilingual-e5-large-instruct",
     model_input={"queries": queries, "instruction": instruction, "passages": passages},
     inference_mode=og.InferenceMode.VANILLA,
diff --git a/examples/alpha/run_inference.py b/examples/alpha/run_inference.py
index 3ae3789..a20eab2 100644
--- a/examples/alpha/run_inference.py
+++ b/examples/alpha/run_inference.py
@@ -2,9 +2,9 @@
 
 import opengradient as og
 
-og_client = og.new_client(email=None, password=None, private_key=os.environ.get("OG_PRIVATE_KEY"))
+og_client = og.Client(private_key=os.environ.get("OG_PRIVATE_KEY"))
 
-inference_result = og_client.infer(
+inference_result = og_client.inference.infer(
     model_cid="hJD2Ja3akZFt1A2LT-D_1oxOCz_OtuGYw4V9eE1m39M",
     model_input={
         "open_high_low_close": [
diff --git a/examples/alpha/use_workflow.py b/examples/alpha/use_workflow.py
index 4ec9314..c7e2e18 100644
--- a/examples/alpha/use_workflow.py
+++ b/examples/alpha/use_workflow.py
@@ -2,7 +2,7 @@
 
 import opengradient as og
 
-og_client = og.new_client(email=None, password=None, private_key=os.environ.get("OG_PRIVATE_KEY"))
+og_client = og.Client(private_key=os.environ.get("OG_PRIVATE_KEY"))
 
 model_output = og_client.alpha.read_workflow_result(
     # This is the workflow contract address that you previously deployed
diff --git a/examples/create_model.py b/examples/create_model.py
index 419fd47..7437c01 100644
--- a/examples/create_model.py
+++ b/examples/create_model.py
@@ -2,12 +2,12 @@
 
 import opengradient as og
 
-og_client = og.new_client(
+og_client = og.Client(
+    private_key=os.environ.get("OG_PRIVATE_KEY"),
     email=os.environ.get("OG_MODEL_HUB_EMAIL"),
     password=os.environ.get("OG_MODEL_HUB_PASSWORD"),
-    private_key=os.environ.get("OG_PRIVATE_KEY"),
 )
 
-og_client.create_model(
+og_client.model_hub.create_model(
     model_name="example-model", model_desc="An example machine learning model for demonstration purposes", version="1.0.0"
 )
diff --git a/examples/run_x402_gemini_tools.py b/examples/run_x402_gemini_tools.py
index 4ddbc37..619c0b1 100644
--- a/examples/run_x402_gemini_tools.py
+++ b/examples/run_x402_gemini_tools.py
@@ -11,9 +11,7 @@
 import opengradient as og
 
 # Initialize client with Google API key
-client = og.new_client(
-    email=None,
-    password=None,
+client = og.Client(
     private_key=os.environ.get("OG_PRIVATE_KEY"),
 )
 
@@ -58,7 +56,7 @@
 print(f"Tools: {tools}")
 print("-" * 50)
 
-result = client.llm_chat(
+result = client.llm.chat(
     model=og.TEE_LLM.GEMINI_2_5_FLASH_LITE,
     messages=messages,
     tools=tools,
diff --git a/examples/run_x402_llm.py b/examples/run_x402_llm.py
index bfe3a77..8926de2 100644
--- a/examples/run_x402_llm.py
+++ b/examples/run_x402_llm.py
@@ -13,9 +13,7 @@
 
 import opengradient as og
 
-client = og.new_client(
-    email=None,
-    password=None,
+client = og.Client(
     private_key=os.environ.get("OG_PRIVATE_KEY"),
 )
 
@@ -25,7 +23,7 @@
     {"role": "user", "content": "What makes it good for beginners?"},
 ]
 
-result = client.llm_chat(
+result = client.llm.chat(
     model=og.TEE_LLM.GPT_4_1_2025_04_14,
     messages=messages,
     x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA,
diff --git a/examples/run_x402_llm_stream.py b/examples/run_x402_llm_stream.py
index 6e12c9c..b8f1127 100644
--- a/examples/run_x402_llm_stream.py
+++ b/examples/run_x402_llm_stream.py
@@ -2,9 +2,7 @@
 
 import opengradient as og
 
-client = og.new_client(
-    email=None,
-    password=None,
+client = og.Client(
     private_key=os.environ.get("OG_PRIVATE_KEY"),
 )
 
@@ -12,7 +10,7 @@
     {"role": "user", "content": "Describe to me the 7 network layers?"},
 ]
 
-stream = client.llm_chat(
+stream = client.llm.chat(
     model=og.TEE_LLM.GPT_4_1_2025_04_14,
     messages=messages,
     x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA,
diff --git a/examples/upload_model.py b/examples/upload_model.py
index b6ad09f..a1f2822 100644
--- a/examples/upload_model.py
+++ b/examples/upload_model.py
@@ -2,13 +2,13 @@
 
 import opengradient as og
 
-og_client = og.new_client(
+og_client = og.Client(
+    private_key=os.environ.get("OG_PRIVATE_KEY"),
     email=os.environ.get("OG_MODEL_HUB_EMAIL"),
     password=os.environ.get("OG_MODEL_HUB_PASSWORD"),
-    private_key=os.environ.get("OG_PRIVATE_KEY"),
 )
 
-model_repo = og_client.create_model(model_name="Demo_Custom_Model_Adam", model_desc="My custom model for demoing Model Hub")
-upload_result = og_client.upload(model_name=model_repo.name, version=model_repo.initialVersion, model_path="./path/to/model.onnx")
+model_repo = og_client.model_hub.create_model(model_name="Demo_Custom_Model_Adam", model_desc="My custom model for demoing Model Hub")
+upload_result = og_client.model_hub.upload(model_name=model_repo.name, version=model_repo.initialVersion, model_path="./path/to/model.onnx")
 
 print(f"Uploaded model, use following CID to access: {upload_result.modelCid}")
diff --git a/integrationtest/agent/test_agent.py b/integrationtest/agent/test_agent.py
index 3611f16..ccc4a5c 100644
--- a/integrationtest/agent/test_agent.py
+++ b/integrationtest/agent/test_agent.py
@@ -7,7 +7,7 @@
 from pydantic import BaseModel, Field
 
 import opengradient as og
-from opengradient import LLM, InferenceResult, init
+from opengradient import LLM, InferenceResult
 from opengradient.alphasense import ToolType, create_read_workflow_tool, create_run_model_tool
 from opengradient.llm import OpenGradientChatModel
 
@@ -19,7 +19,7 @@ def setUp(self):
         if not private_key:
             raise ValueError("PRIVATE_KEY environment variable is not set")
 
-        init(private_key=private_key, email=None, password=None)
+        self.client = og.Client(private_key=private_key)
         self.llm = OpenGradientChatModel(private_key=private_key, model_cid=LLM.CLAUDE_3_7_SONNET)
 
     def test_simple_completion(self):
@@ -39,7 +39,7 @@ def get_balance():
 
     def test_read_workflow(self):
         # Read current workflow result
-        workflow_result = og.alpha.read_workflow_result(contract_address="0x6e0641925b845A1ca8aA9a890C4DEF388E9197e0")
+        workflow_result = self.client.alpha.read_workflow_result(contract_address="0x6e0641925b845A1ca8aA9a890C4DEF388E9197e0")
         expected_result = str(workflow_result.numbers["Y"][0])
 
         btc_workflow_tool = create_read_workflow_tool(
@@ -47,6 +47,7 @@ def test_read_workflow(self):
             workflow_contract_address="0x6e0641925b845A1ca8aA9a890C4DEF388E9197e0",
             tool_name="ETH_Price_Forecast",
             tool_description="Reads latest forecast for ETH price",
+            alpha=self.client.alpha,
             output_formatter=lambda x: x,
         )
 
@@ -86,11 +87,12 @@ def output_formatter(inference_result: InferenceResult):
             tool_name="One_hour_volatility_ETH_USDT",
             model_input_provider=model_input_provider,
             model_output_formatter=output_formatter,
+            inference=self.client.inference,
             tool_description="This tool measures the live 1 hour volatility for the trading pair ETH/USDT.",
             inference_mode=og.InferenceMode.VANILLA,
         )
 
-        expected_result = og.infer(
+        expected_result = self.client.inference.infer(
             inference_mode=og.InferenceMode.VANILLA, model_cid="QmRhcpDXfYCKsimTmJYrAVM4Bbvck59Zb2onj3MHv9Kw5N", model_input=model_input
         )
         formatted_expected_result = format(float(expected_result.model_output["Y"].item()), ".3%")
@@ -191,13 +193,14 @@ def output_formatter(inference_result: InferenceResult):
             tool_name="Return_volatility_tool",
             model_input_provider=model_input_provider,
             model_output_formatter=output_formatter,
+            inference=self.client.inference,
             tool_input_schema=InputSchema,
             tool_description="This tool takes a token and measures the return volatility (standard deviation of returns).",
             inference_mode=og.InferenceMode.VANILLA,
         )
 
         # Test option ETH
-        expected_result_eth = og.infer(
+        expected_result_eth = self.client.inference.infer(
             inference_mode=og.InferenceMode.VANILLA, model_cid="QmZdSfHWGJyzBiB2K98egzu3MypPcv4R1ASypUxwZ1MFUG", model_input=eth_model_input
         )
         formatted_expected_result_eth = format(float(expected_result_eth.model_output["std"].item()), ".3%")
@@ -212,7 +215,7 @@ def output_formatter(inference_result: InferenceResult):
         self.assertIn(formatted_expected_result_eth, list(events)[-1]["messages"][-1].content)
 
         # Test option BTC
-        expected_result_btc = og.infer(
+        expected_result_btc = self.client.inference.infer(
             inference_mode=og.InferenceMode.VANILLA, model_cid="QmZdSfHWGJyzBiB2K98egzu3MypPcv4R1ASypUxwZ1MFUG", model_input=btc_model_input
         )
         formatted_expected_result_btc = format(float(expected_result_btc.model_output["std"].item()), ".3%")
diff --git a/integrationtest/workflow_models/test_workflow_models.py b/integrationtest/workflow_models/test_workflow_models.py
index 5f9ad75..d20009f 100644
--- a/integrationtest/workflow_models/test_workflow_models.py
+++ b/integrationtest/workflow_models/test_workflow_models.py
@@ -2,7 +2,7 @@
 import unittest
 from dataclasses import dataclass
 
-from opengradient import init, read_workflow_result
+import opengradient as og
 from opengradient.workflow_models import (
     read_btc_1_hour_price_forecast,
     read_eth_1_hour_price_forecast,
@@ -37,7 +37,7 @@ def setUp(self):
         if not private_key:
             raise ValueError("PRIVATE_KEY environment variable is not set")
 
-        init(private_key=private_key, email=None, password=None)
+        self.client = og.Client(private_key=private_key)
 
     def test_models(self):
         model_functions = {
@@ -55,8 +55,10 @@ def test_models(self):
         }
 
         for function, model_info in model_functions.items():
-            workflow_result = function()
-            expected_result = format(float(read_workflow_result(model_info.address).numbers[model_info.output_name].item()), ".10%")
+            workflow_result = function(self.client.alpha)
+            expected_result = format(
+                float(self.client.alpha.read_workflow_result(model_info.address).numbers[model_info.output_name].item()), ".10%"
+            )
             print(function)
             print("Workflow result: ", workflow_result)
             assert workflow_result.result == expected_result
diff --git a/pyproject.toml b/pyproject.toml
index 88712ae..0bcd32c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,9 @@ exclude = ["tests*", "stresstest*"]
     "**/*.py"
 ]
 
+[tool.pytest.ini_options]
+pythonpath = ["src"]
+
 [tool.ruff]
 line-length = 140
 target-version = "py310"
diff --git a/src/opengradient/__init__.py b/src/opengradient/__init__.py
index e4167f6..cb5f092 100644
--- a/src/opengradient/__init__.py
+++ b/src/opengradient/__init__.py
@@ -2,12 +2,10 @@
 OpenGradient Python SDK for interacting with AI models and infrastructure.
 """
 
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Optional
 
-from . import alphasense, llm
-from .alpha import _AlphaNamespace
+from . import agents, alphasense
 from .client import Client
-from .defaults import DEFAULT_API_URL, DEFAULT_INFERENCE_CONTRACT_ADDRESS, DEFAULT_RPC_URL
 from .types import (
     LLM,
     TEE_LLM,
@@ -25,285 +23,53 @@
     x402SettlementMode,
 )
 
-# Module-level alpha namespace for workflow/ML execution features (Alpha Testnet only)
-alpha = _AlphaNamespace()
+global_client: Optional[Client] = None
+"""Global client instance. Set by calling :func:`init`."""
 
-_client = None
 
-
-def new_client(
-    email: Optional[str],
-    password: Optional[str],
+def init(
     private_key: str,
-    rpc_url=DEFAULT_RPC_URL,
-    api_url=DEFAULT_API_URL,
-    contract_address=DEFAULT_INFERENCE_CONTRACT_ADDRESS,
+    email: Optional[str] = None,
+    password: Optional[str] = None,
     **kwargs,
 ) -> Client:
-    """
-    Creates a unique OpenGradient client instance with the given authentication and network settings.
-
-    Args:
-        email: User's email address for authentication with Model Hub
-        password: User's password for authentication with Model Hub
-        private_key: Private key for OpenGradient transactions
-        rpc_url: Optional RPC URL for the blockchain network, defaults to testnet
-        contract_address: Optional inference contract address
-    """
-
-    return Client(
-        email=email,
-        password=password,
-        private_key=private_key,
-        rpc_url=rpc_url,
-        api_url=api_url,
-        contract_address=contract_address,
-        **kwargs,
-    )
-
-
-def init(
-    email: str,
-    password: str,
-    private_key: str,
-    rpc_url=DEFAULT_RPC_URL,
-    api_url=DEFAULT_API_URL,
-    contract_address=DEFAULT_INFERENCE_CONTRACT_ADDRESS,
-):
-    """Initialize the OpenGradient SDK with authentication and network settings.
+    """Initialize the global OpenGradient client.
 
     Args:
-        email: User's email address for authentication
-        password: User's password for authentication
-        private_key: Ethereum private key for blockchain transactions
-        rpc_url: Optional RPC URL for the blockchain network, defaults to testnet
-        api_url: Optional API URL for the OpenGradient API, defaults to testnet
-        contract_address: Optional inference contract address
-    """
-    global _client
-
-    _client = Client(
-        private_key=private_key, rpc_url=rpc_url, api_url=api_url, email=email, password=password, contract_address=contract_address
-    )
-    return _client
-
-
-def upload(model_path, model_name, version) -> FileUploadResult:
-    """Upload a model file to OpenGradient.
-
-    Args:
-        model_path: Path to the model file on local filesystem
-        model_name: Name of the model repository
-        version: Version string for this model upload
+        private_key: Private key for OpenGradient transactions.
+        email: Email for Model Hub authentication. Optional.
+        password: Password for Model Hub authentication. Optional.
+        **kwargs: Additional arguments forwarded to :class:`Client`.
 
     Returns:
-        FileUploadResult: Upload response containing file metadata
-
-    Raises:
-        RuntimeError: If SDK is not initialized
-    """
-    if _client is None:
-        raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.upload(model_path, model_name, version)
-
-
-def create_model(model_name: str, model_desc: str, model_path: Optional[str] = None) -> ModelRepository:
-    """Create a new model repository.
-
-    Args:
-        model_name: Name for the new model repository
-        model_desc: Description of the model
-        model_path: Optional path to model file to upload immediately
-
-    Returns:
-        ModelRepository: Creation response with model metadata and optional upload results
-
-    Raises:
-        RuntimeError: If SDK is not initialized
-    """
-    if _client is None:
-        raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-
-    result = _client.create_model(model_name, model_desc)
-
-    if model_path:
-        version = "0.01"
-        upload_result = _client.upload(model_path, model_name, version)
-        result["upload"] = upload_result
-
-    return result
-
-
-def create_version(model_name, notes=None, is_major=False):
-    """Create a new version for an existing model.
-
-    Args:
-        model_name: Name of the model repository
-        notes: Optional release notes for this version
-        is_major: If True, creates a major version bump instead of minor
-
-    Returns:
-        dict: Version creation response with version metadata
-
-    Raises:
-        RuntimeError: If SDK is not initialized
-    """
-    if _client is None:
-        raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.create_version(model_name, notes, is_major)
-
-
-def infer(model_cid, inference_mode, model_input, max_retries: Optional[int] = None) -> InferenceResult:
-    """Run inference on a model.
-
-    Args:
-        model_cid: CID of the model to use
-        inference_mode: Mode of inference (e.g. VANILLA)
-        model_input: Input data for the model
-        max_retries: Maximum number of retries for failed transactions
-
-    Returns:
-        InferenceResult (InferenceResult): A dataclass object containing the transaction hash and model output.
-            * transaction_hash (str): Blockchain hash for the transaction
-            * model_output (Dict[str, np.ndarray]): Output of the ONNX model
-
-    Raises:
-        RuntimeError: If SDK is not initialized
-    """
-    if _client is None:
-        raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.infer(model_cid, inference_mode, model_input, max_retries=max_retries)
-
-
-def llm_completion(
-    model: TEE_LLM,
-    prompt: str,
-    max_tokens: int = 100,
-    stop_sequence: Optional[List[str]] = None,
-    temperature: float = 0.0,
-    x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
-) -> TextGenerationOutput:
-    """Generate text completion using an LLM via TEE.
-
-    Args:
-        model_cid: CID of the LLM model to use (e.g., 'anthropic/claude-3.5-haiku')
-        prompt: Text prompt for completion
-        max_tokens: Maximum tokens to generate
-        stop_sequence: Optional list of sequences where generation should stop
-        temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
-        x402_settlement_mode: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
-
-    Returns:
-        TextGenerationOutput: Transaction hash and generated text
-
-    Raises:
-        RuntimeError: If SDK is not initialized
-    """
-    if _client is None:
-        raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.llm_completion(
-        model=model,
-        prompt=prompt,
-        max_tokens=max_tokens,
-        stop_sequence=stop_sequence,
-        temperature=temperature,
-        x402_settlement_mode=x402_settlement_mode,
-    )
-
-
-def llm_chat(
-    model: TEE_LLM,
-    messages: List[Dict],
-    max_tokens: int = 100,
-    stop_sequence: Optional[List[str]] = None,
-    temperature: float = 0.0,
-    tools: Optional[List[Dict]] = None,
-    tool_choice: Optional[str] = None,
-    x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
-    stream: Optional[bool] = False,
-) -> Union[TextGenerationOutput, TextGenerationStream]:
-    """Have a chat conversation with an LLM via TEE.
-
-    Args:
-        model_cid: CID of the LLM model to use (e.g., 'anthropic/claude-3.5-haiku')
-        messages: List of chat messages, each with 'role' and 'content'
-        max_tokens: Maximum tokens to generate
-        stop_sequence: Optional list of sequences where generation should stop
-        temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
-        tools: Optional list of tools the model can use
-        tool_choice: Optional specific tool to use
-        x402_settlement_mode: Settlement modes for x402 payment protocol transactions (enum x402SettlementMode)
-        stream: Optional boolean to enable streaming
-
-    Returns:
-        TextGenerationOutput or TextGenerationStream
-
-    Raises:
-        RuntimeError: If SDK is not initialized
-    """
-    if _client is None:
-        raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.llm_chat(
-        model=model,
-        messages=messages,
-        max_tokens=max_tokens,
-        stop_sequence=stop_sequence,
-        temperature=temperature,
-        tools=tools,
-        tool_choice=tool_choice,
-        x402_settlement_mode=x402_settlement_mode,
-        stream=stream,
-    )
-
-
-def list_files(model_name: str, version: str) -> List[Dict]:
-    """List files in a model repository version.
-
-    Args:
-        model_name: Name of the model repository
-        version: Version string to list files from
-
-    Returns:
-        List[Dict]: List of file metadata dictionaries
-
-    Raises:
-        RuntimeError: If SDK is not initialized
+        The newly created :class:`Client` instance.
     """
-    if _client is None:
-        raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-    return _client.list_files(model_name, version)
+    global global_client
+    global_client = Client(private_key=private_key, email=email, password=password, **kwargs)
+    return global_client
 
 
 __all__ = [
-    "list_files",
-    "llm_chat",
-    "llm_completion",
-    "infer",
-    "create_version",
-    "create_model",
-    "upload",
+    "Client",
+    "global_client",
     "init",
     "LLM",
     "TEE_LLM",
-    "alpha",
     "InferenceMode",
     "HistoricalInputQuery",
     "SchedulerParams",
     "CandleType",
     "CandleOrder",
-    "llm",
+    "agents",
     "alphasense",
 ]
 
 __pdoc__ = {
     "account": False,
     "cli": False,
-    "client": False,
+    "client": True,
     "defaults": False,
-    "exceptions": False,
-    "llm": True,
+    "agents": True,
     "alphasense": True,
-    "proto": False,
-    "types": False,
-    "utils": False,
+    "types": True,
 }
diff --git a/src/opengradient/llm/__init__.py b/src/opengradient/agents/__init__.py
similarity index 100%
rename from src/opengradient/llm/__init__.py
rename to src/opengradient/agents/__init__.py
diff --git a/src/opengradient/llm/og_langchain.py b/src/opengradient/agents/og_langchain.py
similarity index 89%
rename from src/opengradient/llm/og_langchain.py
rename to src/opengradient/agents/og_langchain.py
index 9a5065a..cc25c33 100644
--- a/src/opengradient/llm/og_langchain.py
+++ b/src/opengradient/agents/og_langchain.py
@@ -1,3 +1,4 @@
+# mypy: ignore-errors
 import json
 from typing import Any, Callable, Dict, List, Optional, Sequence, Union
 
@@ -20,9 +21,8 @@
 from langchain_core.tools import BaseTool
 from typing_extensions import override
 
-from opengradient.client import Client
-from opengradient.defaults import DEFAULT_API_URL, DEFAULT_INFERENCE_CONTRACT_ADDRESS, DEFAULT_RPC_URL
-from opengradient.types import LLM
+from ..client import Client
+from ..types import LLM
 
 
 class OpenGradientChatModel(BaseChatModel):
@@ -36,14 +36,7 @@ class OpenGradientChatModel(BaseChatModel):
     def __init__(self, private_key: str, model_cid: LLM, max_tokens: int = 300):
         super().__init__()
 
-        self._client = Client(
-            private_key=private_key,
-            rpc_url=DEFAULT_RPC_URL,
-            api_url=DEFAULT_API_URL,
-            contract_address=DEFAULT_INFERENCE_CONTRACT_ADDRESS,
-            email=None,
-            password=None,
-        )
+        self._client = Client(private_key=private_key)
         self._model_cid = model_cid
         self._max_tokens = max_tokens
 
@@ -112,8 +105,8 @@ def _generate(
             else:
                 raise ValueError(f"Unexpected message type: {message}")
 
-        chat_output = self._client.llm_chat(
-            model_cid=self._model_cid,
+        chat_output = self._client.llm.chat(
+            model=self._model_cid,
             messages=sdk_messages,
             stop_sequence=stop,
             max_tokens=self._max_tokens,
diff --git a/src/opengradient/llm/og_openai.py b/src/opengradient/agents/og_openai.py
similarity index 88%
rename from src/opengradient/llm/og_openai.py
rename to src/opengradient/agents/og_openai.py
index ce00f34..2a41043 100644
--- a/src/opengradient/llm/og_openai.py
+++ b/src/opengradient/agents/og_openai.py
@@ -1,12 +1,12 @@
+# mypy: ignore-errors
 import time
 import uuid
 from typing import List
 
 from openai.types.chat import ChatCompletion
 
-from opengradient.client import Client
-from opengradient.defaults import DEFAULT_INFERENCE_CONTRACT_ADDRESS, DEFAULT_RPC_URL
-from opengradient.types import LLM
+from ..client import Client
+from ..types import LLM
 
 
 class OGCompletions(object):
@@ -27,8 +27,8 @@ def create(
         # convert OpenAI message format so it's compatible with the SDK
         sdk_messages = OGCompletions.convert_to_abi_compatible(messages)
 
-        chat_output = self.client.llm_chat(
-            model_cid=model,
+        chat_output = self.client.llm.chat(
+            model=model,
             messages=sdk_messages,
             max_tokens=200,
             tools=tools,
@@ -109,7 +109,5 @@ class OpenGradientOpenAIClient(object):
     chat: OGChat
 
     def __init__(self, private_key: str):
-        self.client = Client(
-            private_key=private_key, rpc_url=DEFAULT_RPC_URL, contract_address=DEFAULT_INFERENCE_CONTRACT_ADDRESS, email=None, password=None
-        )
+        self.client = Client(private_key=private_key)
         self.chat = OGChat(self.client)
diff --git a/src/opengradient/alpha.py b/src/opengradient/alpha.py
deleted file mode 100644
index 930f48b..0000000
--- a/src/opengradient/alpha.py
+++ /dev/null
@@ -1,368 +0,0 @@
-"""
-Alpha Testnet features for OpenGradient SDK.
-
-This module contains features that are only available on the Alpha Testnet,
-including workflow management and ML model execution.
-"""
-
-import json
-from pathlib import Path
-from typing import TYPE_CHECKING, List, Optional
-
-from web3 import Web3
-from web3.exceptions import ContractLogicError
-
-from .defaults import DEFAULT_SCHEDULER_ADDRESS
-from .types import HistoricalInputQuery, ModelOutput, SchedulerParams
-from .utils import convert_array_to_model_output
-
-if TYPE_CHECKING:
-    from .client import Client
-
-# How much time we wait for txn to be included in chain
-INFERENCE_TX_TIMEOUT = 120
-REGULAR_TX_TIMEOUT = 30
-
-
-class Alpha:
-    """
-    Alpha Testnet features namespace.
-
-    This class provides access to features that are only available on the Alpha Testnet,
-    including workflow deployment and execution.
-
-    Usage:
-        client = og.new_client(...)
-        result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
-    """
-
-    def __init__(self, client: "Client"):
-        self._client = client
-
-    def _get_abi(self, abi_name: str) -> dict:
-        """Returns the ABI for the requested contract."""
-        abi_path = Path(__file__).parent / "abi" / abi_name
-        with open(abi_path, "r") as f:
-            return json.load(f)
-
-    def _get_bin(self, bin_name: str) -> str:
-        """Returns the bin for the requested contract."""
-        bin_path = Path(__file__).parent / "bin" / bin_name
-        with open(bin_path, "r", encoding="utf-8") as f:
-            bytecode = f.read().strip()
-            if not bytecode.startswith("0x"):
-                bytecode = "0x" + bytecode
-            return bytecode
-
-    def new_workflow(
-        self,
-        model_cid: str,
-        input_query: HistoricalInputQuery,
-        input_tensor_name: str,
-        scheduler_params: Optional[SchedulerParams] = None,
-    ) -> str:
-        """
-        Deploy a new workflow contract with the specified parameters.
-
-        This function deploys a new workflow contract on OpenGradient that connects
-        an AI model with its required input data. When executed, the workflow will fetch
-        the specified model, evaluate the input query to get data, and perform inference.
-
-        The workflow can be set to execute manually or automatically via a scheduler.
-
-        Args:
-            model_cid (str): CID of the model to be executed from the Model Hub
-            input_query (HistoricalInputQuery): Input definition for the model inference,
-                will be evaluated at runtime for each inference
-            input_tensor_name (str): Name of the input tensor expected by the model
-            scheduler_params (Optional[SchedulerParams]): Scheduler configuration for automated execution:
-                - frequency: Execution frequency in seconds
-                - duration_hours: How long the schedule should live for
-
-        Returns:
-            str: Deployed contract address. If scheduler_params was provided, the workflow
-                 will be automatically executed according to the specified schedule.
-
-        Raises:
-            Exception: If transaction fails or gas estimation fails
-        """
-        from .client import run_with_retry
-
-        # Get contract ABI and bytecode
-        abi = self._get_abi("PriceHistoryInference.abi")
-        bytecode = self._get_bin("PriceHistoryInference.bin")
-
-        def deploy_transaction():
-            contract = self._client._blockchain.eth.contract(abi=abi, bytecode=bytecode)
-            query_tuple = input_query.to_abi_format()
-            constructor_args = [model_cid, input_tensor_name, query_tuple]
-
-            try:
-                # Estimate gas needed
-                estimated_gas = contract.constructor(*constructor_args).estimate_gas({"from": self._client._wallet_account.address})
-                gas_limit = int(estimated_gas * 1.2)
-            except Exception as e:
-                print(f"⚠️ Gas estimation failed: {str(e)}")
-                gas_limit = 5000000  # Conservative fallback
-                print(f"📊 Using fallback gas limit: {gas_limit}")
-
-            transaction = contract.constructor(*constructor_args).build_transaction(
-                {
-                    "from": self._client._wallet_account.address,
-                    "nonce": self._client._blockchain.eth.get_transaction_count(self._client._wallet_account.address, "pending"),
-                    "gas": gas_limit,
-                    "gasPrice": self._client._blockchain.eth.gas_price,
-                    "chainId": self._client._blockchain.eth.chain_id,
-                }
-            )
-
-            signed_txn = self._client._wallet_account.sign_transaction(transaction)
-            tx_hash = self._client._blockchain.eth.send_raw_transaction(signed_txn.raw_transaction)
-
-            tx_receipt = self._client._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=60)
-
-            if tx_receipt["status"] == 0:
-                raise Exception(f"❌ Contract deployment failed, transaction hash: {tx_hash.hex()}")
-
-            return tx_receipt.contractAddress
-
-        contract_address = run_with_retry(deploy_transaction)
-
-        if scheduler_params:
-            self._register_with_scheduler(contract_address, scheduler_params)
-
-        return contract_address
-
-    def _register_with_scheduler(self, contract_address: str, scheduler_params: SchedulerParams) -> None:
-        """
-        Register the deployed workflow contract with the scheduler for automated execution.
-
-        Args:
-            contract_address (str): Address of the deployed workflow contract
-            scheduler_params (SchedulerParams): Scheduler configuration containing:
-                - frequency: Execution frequency in seconds
-                - duration_hours: How long to run in hours
-                - end_time: Unix timestamp when scheduling should end
-
-        Raises:
-            Exception: If registration with scheduler fails. The workflow contract will
-                      still be deployed and can be executed manually.
-        """
-        scheduler_abi = self._get_abi("WorkflowScheduler.abi")
-
-        # Scheduler contract address
-        scheduler_address = DEFAULT_SCHEDULER_ADDRESS
-        scheduler_contract = self._client._blockchain.eth.contract(address=scheduler_address, abi=scheduler_abi)
-
-        try:
-            # Register the workflow with the scheduler
-            scheduler_tx = scheduler_contract.functions.registerTask(
-                contract_address, scheduler_params.end_time, scheduler_params.frequency
-            ).build_transaction(
-                {
-                    "from": self._client._wallet_account.address,
-                    "gas": 300000,
-                    "gasPrice": self._client._blockchain.eth.gas_price,
-                    "nonce": self._client._blockchain.eth.get_transaction_count(self._client._wallet_account.address, "pending"),
-                    "chainId": self._client._blockchain.eth.chain_id,
-                }
-            )
-
-            signed_scheduler_tx = self._client._wallet_account.sign_transaction(scheduler_tx)
-            scheduler_tx_hash = self._client._blockchain.eth.send_raw_transaction(signed_scheduler_tx.raw_transaction)
-            self._client._blockchain.eth.wait_for_transaction_receipt(scheduler_tx_hash, timeout=REGULAR_TX_TIMEOUT)
-        except Exception as e:
-            print(f"❌ Error registering contract with scheduler: {str(e)}")
-            print("  The workflow contract is still deployed and can be executed manually.")
-
-    def read_workflow_result(self, contract_address: str) -> ModelOutput:
-        """
-        Reads the latest inference result from a deployed workflow contract.
-
-        Args:
-            contract_address (str): Address of the deployed workflow contract
-
-        Returns:
-            ModelOutput: The inference result from the contract
-
-        Raises:
-            ContractLogicError: If the transaction fails
-            Web3Error: If there are issues with the web3 connection or contract interaction
-        """
-        # Get the contract interface
-        contract = self._client._blockchain.eth.contract(
-            address=Web3.to_checksum_address(contract_address), abi=self._get_abi("PriceHistoryInference.abi")
-        )
-
-        # Get the result
-        result = contract.functions.getInferenceResult().call()
-
-        return convert_array_to_model_output(result)
-
-    def run_workflow(self, contract_address: str) -> ModelOutput:
-        """
-        Triggers the run() function on a deployed workflow contract and returns the result.
-
-        Args:
-            contract_address (str): Address of the deployed workflow contract
-
-        Returns:
-            ModelOutput: The inference result from the contract
-
-        Raises:
-            ContractLogicError: If the transaction fails
-            Web3Error: If there are issues with the web3 connection or contract interaction
-        """
-        # Get the contract interface
-        contract = self._client._blockchain.eth.contract(
-            address=Web3.to_checksum_address(contract_address), abi=self._get_abi("PriceHistoryInference.abi")
-        )
-
-        # Call run() function
-        nonce = self._client._blockchain.eth.get_transaction_count(self._client._wallet_account.address, "pending")
-
-        run_function = contract.functions.run()
-        transaction = run_function.build_transaction(
-            {
-                "from": self._client._wallet_account.address,
-                "nonce": nonce,
-                "gas": 30000000,
-                "gasPrice": self._client._blockchain.eth.gas_price,
-                "chainId": self._client._blockchain.eth.chain_id,
-            }
-        )
-
-        signed_txn = self._client._wallet_account.sign_transaction(transaction)
-        tx_hash = self._client._blockchain.eth.send_raw_transaction(signed_txn.raw_transaction)
-        tx_receipt = self._client._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=INFERENCE_TX_TIMEOUT)
-
-        if tx_receipt.status == 0:
-            raise ContractLogicError(f"Run transaction failed. Receipt: {tx_receipt}")
-
-        # Get the inference result from the contract
-        result = contract.functions.getInferenceResult().call()
-
-        return convert_array_to_model_output(result)
-
-    def read_workflow_history(self, contract_address: str, num_results: int) -> List[ModelOutput]:
-        """
-        Gets historical inference results from a workflow contract.
-
-        Retrieves the specified number of most recent inference results from the contract's
-        storage, with the most recent result first.
-
-        Args:
-            contract_address (str): Address of the deployed workflow contract
-            num_results (int): Number of historical results to retrieve
-
-        Returns:
-            List[ModelOutput]: List of historical inference results
-        """
-        contract = self._client._blockchain.eth.contract(
-            address=Web3.to_checksum_address(contract_address), abi=self._get_abi("PriceHistoryInference.abi")
-        )
-
-        results = contract.functions.getLastInferenceResults(num_results).call()
-        return [convert_array_to_model_output(result) for result in results]
-
-
-class _AlphaNamespace:
-    """
-    Module-level alpha namespace for use with og.init().
-
-    Usage:
-        og.init(...)
-        result = og.alpha.new_workflow(model_cid, input_query, input_tensor_name)
-    """
-
-    def new_workflow(
-        self,
-        model_cid: str,
-        input_query: HistoricalInputQuery,
-        input_tensor_name: str,
-        scheduler_params: Optional[SchedulerParams] = None,
-    ) -> str:
-        """
-        Deploy a new workflow contract with the specified parameters.
-
-        This function deploys a new workflow contract and optionally registers it with
-        the scheduler for automated execution. If scheduler_params is not provided,
-        the workflow will be deployed without automated execution scheduling.
-
-        Args:
-            model_cid: IPFS CID of the model
-            input_query: HistoricalInputQuery containing query parameters
-            input_tensor_name: Name of the input tensor
-            scheduler_params: Optional scheduler configuration as SchedulerParams instance
-                If not provided, the workflow will be deployed without scheduling.
-
-        Returns:
-            str: Deployed contract address. If scheduler_params was provided, the workflow
-                 will be automatically executed according to the specified schedule.
-        """
-        from . import _client
-
-        if _client is None:
-            raise RuntimeError("OpenGradient client not initialized. Call og.init(...) first.")
-
-        return _client.alpha.new_workflow(
-            model_cid=model_cid,
-            input_query=input_query,
-            input_tensor_name=input_tensor_name,
-            scheduler_params=scheduler_params,
-        )
-
-    def read_workflow_result(self, contract_address: str) -> ModelOutput:
-        """
-        Reads the latest inference result from a deployed workflow contract.
-
-        This function retrieves the most recent output from a deployed model executor contract.
-        It includes built-in retry logic to handle blockchain state delays.
-
-        Args:
-            contract_address (str): Address of the deployed workflow contract
-
-        Returns:
-            ModelOutput: The inference result from the contract
-
-        Raises:
-            RuntimeError: If OpenGradient client is not initialized
-        """
-        from . import _client
-
-        if _client is None:
-            raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-        return _client.alpha.read_workflow_result(contract_address)
-
-    def run_workflow(self, contract_address: str) -> ModelOutput:
-        """
-        Executes the workflow by calling run() on the contract to pull latest data and perform inference.
-
-        Args:
-            contract_address (str): Address of the deployed workflow contract
-
-        Returns:
-            ModelOutput: The inference result from the contract
-        """
-        from . import _client
-
-        if _client is None:
-            raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-        return _client.alpha.run_workflow(contract_address)
-
-    def read_workflow_history(self, contract_address: str, num_results: int) -> List[ModelOutput]:
-        """
-        Gets historical inference results from a workflow contract.
-
-        Args:
-            contract_address (str): Address of the deployed workflow contract
-            num_results (int): Number of historical results to retrieve
-
-        Returns:
-            List[ModelOutput]: List of historical inference results
-        """
-        from . import _client
-
-        if _client is None:
-            raise RuntimeError("OpenGradient client not initialized. Call og.init() first.")
-        return _client.alpha.read_workflow_history(contract_address, num_results)
diff --git a/src/opengradient/alphasense/read_workflow_tool.py b/src/opengradient/alphasense/read_workflow_tool.py
index 262baca..d440aba 100644
--- a/src/opengradient/alphasense/read_workflow_tool.py
+++ b/src/opengradient/alphasense/read_workflow_tool.py
@@ -1,9 +1,8 @@
-from typing import Callable
+from typing import Callable, Optional
 
 from langchain_core.tools import BaseTool, StructuredTool
 
-import opengradient as og
-
+from ..client.alpha import Alpha
 from .types import ToolType
 
 
@@ -12,6 +11,7 @@ def create_read_workflow_tool(
     workflow_contract_address: str,
     tool_name: str,
     tool_description: str,
+    alpha: Optional[Alpha] = None,
     output_formatter: Callable[..., str] = lambda x: x,
 ) -> BaseTool | Callable:
     """
@@ -31,6 +31,8 @@ def create_read_workflow_tool(
             identify and invoke the tool within the agent.
         tool_description (str): A description of what the tool does and how it processes
             the workflow results.
+        alpha (Alpha, optional): The alpha namespace from an initialized OpenGradient client
+            (client.alpha). If not provided, falls back to the global client set via ``opengradient.init()``.
         output_formatter (Callable[..., str], optional): A function that takes the workflow output
             and formats it into a string. This ensures the output is compatible with
             the tool framework. Default returns string as is.
@@ -50,22 +52,25 @@ def create_read_workflow_tool(
         ...     tool_type=ToolType.LANGCHAIN,
         ...     workflow_contract_address="0x123...",
         ...     tool_name="workflow_reader",
-        ...     output_formatter=format_output,
-        ...     tool_description="Reads and formats workflow execution results"
-        ... )
-        >>> # Create a Swarm tool
-        >>> swarm_tool = create_read_workflow_tool(
-        ...     tool_type=ToolType.SWARM,
-        ...     workflow_contract_address="0x123...",
-        ...     tool_name="workflow_reader",
+        ...     alpha=client.alpha,
         ...     output_formatter=format_output,
         ...     tool_description="Reads and formats workflow execution results"
         ... )
     """
 
+    if alpha is None:
+        import opengradient as og
+
+        if og.global_client is None:
+            raise ValueError(
+                "No alpha instance provided and no global client initialized. "
+                "Either pass alpha=client.alpha or call opengradient.init() first."
+            )
+        alpha = og.global_client.alpha
+
     # define runnable
     def read_workflow():
-        output = og.alpha.read_workflow_result(contract_address=workflow_contract_address)
+        output = alpha.read_workflow_result(contract_address=workflow_contract_address)
         return output_formatter(output)
 
     if tool_type == ToolType.LANGCHAIN:
diff --git a/src/opengradient/alphasense/run_model_tool.py b/src/opengradient/alphasense/run_model_tool.py
index 2bb41f8..92a6485 100644
--- a/src/opengradient/alphasense/run_model_tool.py
+++ b/src/opengradient/alphasense/run_model_tool.py
@@ -4,9 +4,8 @@
 from langchain_core.tools import BaseTool, StructuredTool
 from pydantic import BaseModel
 
-import opengradient as og
-from opengradient.types import InferenceMode, InferenceResult
-
+from ..client.onchain_inference import Inference
+from ..types import InferenceMode, InferenceResult
 from .types import ToolType
 
 
@@ -16,6 +15,7 @@ def create_run_model_tool(
     tool_name: str,
     model_input_provider: Callable[..., Dict[str, Union[str, int, float, List, np.ndarray]]],
     model_output_formatter: Callable[[InferenceResult], str],
+    inference: Optional[Inference] = None,
     tool_input_schema: Optional[Type[BaseModel]] = None,
     tool_description: str = "Executes the given ML model",
     inference_mode: InferenceMode = InferenceMode.VANILLA,
@@ -49,6 +49,8 @@ def create_run_model_tool(
             InferenceResult has attributes:
                 * transaction_hash (str): Blockchain hash for the transaction
                 * model_output (Dict[str, np.ndarray]): Output of the ONNX model
+        inference (Inference, optional): The inference namespace from an initialized OpenGradient client
+            (client.inference). If not provided, falls back to the global client set via ``opengradient.init()``.
         tool_input_schema (Type[BaseModel], optional): A Pydantic BaseModel class defining the
             input schema.
 
@@ -61,7 +63,7 @@ def create_run_model_tool(
             Default is None -- an empty schema will be provided for LangChain.
         tool_description (str, optional): A description of what the tool does. Defaults to
             "Executes the given ML model".
-        inference_mode (og.InferenceMode, optional): The inference mode to use when running
+        inference_mode (InferenceMode, optional): The inference mode to use when running
             the model. Defaults to VANILLA.
 
     Returns:
@@ -102,17 +104,28 @@ def create_run_model_tool(
         ...     tool_name="Return_volatility_tool",
         ...     model_input_provider=model_input_provider,
         ...     model_output_formatter=output_formatter,
+        ...     inference=client.inference,
         ...     tool_input_schema=InputSchema,
         ...     tool_description="This tool takes a token and measures the return volatility (standard deviation of returns).",
         ...     inference_mode=og.InferenceMode.VANILLA,
         ... )
     """
 
+    if inference is None:
+        import opengradient as og
+
+        if og.global_client is None:
+            raise ValueError(
+                "No inference instance provided and no global client initialized. "
+                "Either pass inference=client.inference or call opengradient.init() first."
+            )
+        inference = og.global_client.inference
+
     def model_executor(**llm_input):
         # Pass LLM input arguments (formatted based on tool_input_schema) as parameters into model_input_provider
         model_input = model_input_provider(**llm_input)
 
-        inference_result = og.infer(model_cid=model_cid, inference_mode=inference_mode, model_input=model_input)
+        inference_result = inference.infer(model_cid=model_cid, inference_mode=inference_mode, model_input=model_input)
 
         return model_output_formatter(inference_result)
 
diff --git a/src/opengradient/cli.py b/src/opengradient/cli.py
index 7e0228e..9c2ab87 100644
--- a/src/opengradient/cli.py
+++ b/src/opengradient/cli.py
@@ -324,7 +324,9 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path
                 model_input = json.load(file)
 
         click.echo(f'Running {inference_mode} inference for model "{model_cid}"')
-        inference_result = client.infer(model_cid=model_cid, inference_mode=InferenceModes[inference_mode], model_input=model_input)
+        inference_result = client.inference.infer(
+            model_cid=model_cid, inference_mode=InferenceModes[inference_mode], model_input=model_input
+        )
 
         click.echo()  # Add a newline for better spacing
         click.secho("✅ Transaction successful", fg="green", bold=True)
@@ -394,8 +396,8 @@ def completion(
     try:
         click.echo(f'Running TEE LLM completion for model "{model_cid}"\n')
 
-        completion_output = client.llm_completion(
-            model_cid=model_cid,
+        completion_output = client.llm.completion(
+            model=model_cid,
             prompt=prompt,
             max_tokens=max_tokens,
             stop_sequence=list(stop_sequence),
@@ -552,8 +554,8 @@ def chat(
         if not tools and not tools_file:
             parsed_tools = None
 
-        result = client.llm_chat(
-            model_cid=model_cid,
+        result = client.llm.chat(
+            model=model_cid,
             messages=messages,
             max_tokens=max_tokens,
             stop_sequence=list(stop_sequence),
diff --git a/src/opengradient/client.py b/src/opengradient/client.py
deleted file mode 100644
index a2863a7..0000000
--- a/src/opengradient/client.py
+++ /dev/null
@@ -1,997 +0,0 @@
-import asyncio
-import base64
-import json
-import os
-import time
-import urllib.parse
-from pathlib import Path
-from typing import Callable, Dict, List, Optional, Union
-
-import firebase
-import httpx
-import numpy as np
-import requests
-from eth_account.account import LocalAccount
-from web3 import Web3
-from web3.exceptions import ContractLogicError
-from web3.logs import DISCARD
-from x402.clients.base import x402Client
-from x402.clients.httpx import x402HttpxClient
-
-from .defaults import (
-    DEFAULT_NETWORK_FILTER,
-    DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
-    DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
-)
-from .exceptions import OpenGradientError
-from .types import (
-    TEE_LLM,
-    FileUploadResult,
-    InferenceMode,
-    InferenceResult,
-    ModelRepository,
-    StreamChunk,
-    TextGenerationOutput,
-    TextGenerationStream,
-    x402SettlementMode,
-)
-from .utils import convert_to_model_input, convert_to_model_output
-from .x402_auth import X402Auth
-
-# Security Update: Credentials moved to environment variables
-_FIREBASE_CONFIG = {
-    "apiKey": os.getenv("FIREBASE_API_KEY"),
-    "authDomain": os.getenv("FIREBASE_AUTH_DOMAIN"),
-    "projectId": os.getenv("FIREBASE_PROJECT_ID"),
-    "storageBucket": os.getenv("FIREBASE_STORAGE_BUCKET"),
-    "appId": os.getenv("FIREBASE_APP_ID"),
-    "databaseURL": os.getenv("FIREBASE_DATABASE_URL", ""),
-}
-
-# How much time we wait for txn to be included in chain
-LLM_TX_TIMEOUT = 60
-INFERENCE_TX_TIMEOUT = 120
-REGULAR_TX_TIMEOUT = 30
-
-# How many times we retry a transaction because of nonce conflict
-DEFAULT_MAX_RETRY = 5
-DEFAULT_RETRY_DELAY_SEC = 1
-
-PRECOMPILE_CONTRACT_ADDRESS = "0x00000000000000000000000000000000000000F4"
-
-X402_PROCESSING_HASH_HEADER = "x-processing-hash"
-X402_PLACEHOLDER_API_KEY = "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
-
-TIMEOUT = httpx.Timeout(
-    timeout=90.0,
-    connect=15.0,
-    read=15.0,
-    write=30.0,
-    pool=10.0,
-)
-LIMITS = httpx.Limits(
-    max_keepalive_connections=100,
-    max_connections=500,
-    keepalive_expiry=60 * 20,  # 20 minutes
-)
-
-
-class Client:
-    _inference_hub_contract_address: str
-    _blockchain: Web3
-    _wallet_account: LocalAccount
-
-    _hub_user: Optional[Dict]
-    _api_url: str
-    _inference_abi: Dict
-    _precompile_abi: Dict
-
-    def __init__(
-        self,
-        private_key: str,
-        rpc_url: str,
-        api_url: str,
-        contract_address: str,
-        email: Optional[str] = None,
-        password: Optional[str] = None,
-        og_llm_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
-        og_llm_streaming_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
-    ):
-        """
-        Initialize the Client with private key, RPC URL, and contract address.
-
-        Args:
-            private_key (str): The private key for the wallet.
-            rpc_url (str): The RPC URL for the Ethereum node.
-            contract_address (str): The contract address for the smart contract.
-            email (str, optional): Email for authentication. Defaults to "test@test.com".
-            password (str, optional): Password for authentication. Defaults to "Test-123".
-        """
-        self._inference_hub_contract_address = contract_address
-        self._blockchain = Web3(Web3.HTTPProvider(rpc_url))
-        self._api_url = api_url
-        self._wallet_account = self._blockchain.eth.account.from_key(private_key)
-
-        abi_path = Path(__file__).parent / "abi" / "inference.abi"
-        with open(abi_path, "r") as abi_file:
-            self._inference_abi = json.load(abi_file)
-
-        abi_path = Path(__file__).parent / "abi" / "InferencePrecompile.abi"
-        with open(abi_path, "r") as abi_file:
-            self._precompile_abi = json.load(abi_file)
-
-        if email is not None:
-            self._hub_user = self._login_to_hub(email, password)
-        else:
-            self._hub_user = None
-
-        self._og_llm_server_url = og_llm_server_url
-        self._og_llm_streaming_server_url = og_llm_streaming_server_url
-
-        self._alpha = None  # Lazy initialization for alpha namespace
-
-    @property
-    def alpha(self):
-        """
-        Access Alpha Testnet features.
-
-        Returns:
-            Alpha: Alpha namespace with workflow and ML model execution methods.
-
-        Example:
-            client = og.new_client(...)
-            result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
-        """
-        if self._alpha is None:
-            from .alpha import Alpha
-
-            self._alpha = Alpha(self)
-        return self._alpha
-
-    def _login_to_hub(self, email, password):
-        if not _FIREBASE_CONFIG.get("apiKey"):
-            raise ValueError("Firebase API Key is missing in environment variables")
-
-        firebase_app = firebase.initialize_app(_FIREBASE_CONFIG)
-        return firebase_app.auth().sign_in_with_email_and_password(email, password)
-
-    def create_model(self, model_name: str, model_desc: str, version: str = "1.00") -> ModelRepository:
-        """
-        Create a new model with the given model_name and model_desc, and a specified version.
-
-        Args:
-            model_name (str): The name of the model.
-            model_desc (str): The description of the model.
-            version (str): The version identifier (default is "1.00").
-
-        Returns:
-            dict: The server response containing model details.
-
-        Raises:
-            CreateModelError: If the model creation fails.
-        """
-        if not self._hub_user:
-            raise ValueError("User not authenticated")
-
-        url = "https://api.opengradient.ai/api/v0/models/"
-        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}", "Content-Type": "application/json"}
-        payload = {"name": model_name, "description": model_desc}
-
-        try:
-            response = requests.post(url, json=payload, headers=headers)
-            response.raise_for_status()
-        except requests.HTTPError as e:
-            error_details = f"HTTP {e.response.status_code}: {e.response.text}"
-            raise OpenGradientError(f"Model creation failed: {error_details}") from e
-
-        json_response = response.json()
-        model_name = json_response.get("name")
-        if not model_name:
-            raise Exception(f"Model creation response missing 'name'. Full response: {json_response}")
-
-        # Create the specified version for the newly created model
-        version_response = self.create_version(model_name, version)
-
-        return ModelRepository(model_name, version_response["versionString"])
-
-    def create_version(self, model_name: str, notes: str = "", is_major: bool = False) -> dict:
-        """
-        Create a new version for the specified model.
-
-        Args:
-            model_name (str): The unique identifier for the model.
-            notes (str, optional): Notes for the new version.
-            is_major (bool, optional): Whether this is a major version update. Defaults to False.
-
-        Returns:
-            dict: The server response containing version details.
-
-        Raises:
-            Exception: If the version creation fails.
-        """
-        if not self._hub_user:
-            raise ValueError("User not authenticated")
-
-        url = f"https://api.opengradient.ai/api/v0/models/{model_name}/versions"
-        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}", "Content-Type": "application/json"}
-        payload = {"notes": notes, "is_major": is_major}
-
-        try:
-            response = requests.post(url, json=payload, headers=headers, allow_redirects=False)
-            response.raise_for_status()
-
-            json_response = response.json()
-
-            if isinstance(json_response, list) and not json_response:
-                return {"versionString": "Unknown", "note": "Created based on empty response"}
-            elif isinstance(json_response, dict):
-                version_string = json_response.get("versionString")
-                if not version_string:
-                    return {"versionString": "Unknown", "note": "Version ID not provided in response"}
-                return {"versionString": version_string}
-            else:
-                raise Exception(f"Unexpected response type: {type(json_response)}")
-
-        except requests.RequestException as e:
-            raise Exception(f"Version creation failed: {str(e)}")
-        except Exception:
-            raise
-
-    def upload(self, model_path: str, model_name: str, version: str) -> FileUploadResult:
-        """
-        Upload a model file to the server.
-
-        Args:
-            model_path (str): The path to the model file.
-            model_name (str): The unique identifier for the model.
-            version (str): The version identifier for the model.
-
-        Returns:
-            dict: The processed result.
-
-        Raises:
-            OpenGradientError: If the upload fails.
-        """
-        from requests_toolbelt import MultipartEncoder
-
-        if not self._hub_user:
-            raise ValueError("User not authenticated")
-
-        if not os.path.exists(model_path):
-            raise FileNotFoundError(f"Model file not found: {model_path}")
-
-        url = f"https://api.opengradient.ai/api/v0/models/{model_name}/versions/{version}/files"
-        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}"}
-
-        try:
-            with open(model_path, "rb") as file:
-                encoder = MultipartEncoder(fields={"file": (os.path.basename(model_path), file, "application/octet-stream")})
-                headers["Content-Type"] = encoder.content_type
-
-                response = requests.post(url, data=encoder, headers=headers, timeout=3600)
-
-                if response.status_code == 201:
-                    if response.content and response.content != b"null":
-                        json_response = response.json()
-                        return FileUploadResult(json_response.get("ipfsCid"), json_response.get("size"))
-                    else:
-                        raise RuntimeError("Empty or null response content received")
-                elif response.status_code == 500:
-                    raise OpenGradientError("Internal server error occurred", status_code=500)
-                else:
-                    error_message = response.json().get("detail", "Unknown error occurred")
-                    raise OpenGradientError(f"Upload failed: {error_message}", status_code=response.status_code)
-
-        except requests.RequestException as e:
-            raise OpenGradientError(f"Upload failed: {str(e)}")
-        except OpenGradientError:
-            raise
-        except Exception as e:
-            raise OpenGradientError(f"Unexpected error during upload: {str(e)}")
-
-    def infer(
-        self,
-        model_cid: str,
-        inference_mode: InferenceMode,
-        model_input: Dict[str, Union[str, int, float, List, np.ndarray]],
-        max_retries: Optional[int] = None,
-    ) -> InferenceResult:
-        """
-        Perform inference on a model.
-
-        Args:
-            model_cid (str): The unique content identifier for the model from IPFS.
-            inference_mode (InferenceMode): The inference mode.
-            model_input (Dict[str, Union[str, int, float, List, np.ndarray]]): The input data for the model.
-            max_retries (int, optional): Maximum number of retry attempts. Defaults to 5.
-
-        Returns:
-            InferenceResult (InferenceResult): A dataclass object containing the transaction hash and model output.
-                transaction_hash (str): Blockchain hash for the transaction
-                model_output (Dict[str, np.ndarray]): Output of the ONNX model
-
-        Raises:
-            OpenGradientError: If the inference fails.
-        """
-
-        def execute_transaction():
-            contract = self._blockchain.eth.contract(address=self._inference_hub_contract_address, abi=self._inference_abi)
-            precompile_contract = self._blockchain.eth.contract(address=PRECOMPILE_CONTRACT_ADDRESS, abi=self._precompile_abi)
-
-            inference_mode_uint8 = inference_mode.value
-            converted_model_input = convert_to_model_input(model_input)
-
-            run_function = contract.functions.run(model_cid, inference_mode_uint8, converted_model_input)
-
-            tx_hash, tx_receipt = self._send_tx_with_revert_handling(run_function)
-            parsed_logs = contract.events.InferenceResult().process_receipt(tx_receipt, errors=DISCARD)
-            if len(parsed_logs) < 1:
-                raise OpenGradientError("InferenceResult event not found in transaction logs")
-
-            # TODO: This should return a ModelOutput class object
-            model_output = convert_to_model_output(parsed_logs[0]["args"])
-            if len(model_output) == 0:
-                # check inference directly from node
-                parsed_logs = precompile_contract.events.ModelInferenceEvent().process_receipt(tx_receipt, errors=DISCARD)
-                inference_id = parsed_logs[0]["args"]["inferenceID"]
-                inference_result = self._get_inference_result_from_node(inference_id, inference_mode)
-                model_output = convert_to_model_output(inference_result)
-
-            return InferenceResult(tx_hash.hex(), model_output)
-
-        return run_with_retry(execute_transaction, max_retries)
-
-    def _og_payment_selector(self, accepts, network_filter=DEFAULT_NETWORK_FILTER, scheme_filter=None, max_value=None):
-        """Custom payment selector for OpenGradient network."""
-        return x402Client.default_payment_requirements_selector(
-            accepts,
-            network_filter=network_filter,
-            scheme_filter=scheme_filter,
-            max_value=max_value,
-        )
-
-    def llm_completion(
-        self,
-        model: TEE_LLM,
-        prompt: str,
-        max_tokens: int = 100,
-        stop_sequence: Optional[List[str]] = None,
-        temperature: float = 0.0,
-        x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
-    ) -> TextGenerationOutput:
-        """
-        Perform inference on an LLM model using completions via TEE.
-
-        Args:
-            model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_3_5_HAIKU).
-            prompt (str): The input prompt for the LLM.
-            max_tokens (int): Maximum number of tokens for LLM output. Default is 100.
-            stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None.
-            temperature (float): Temperature for LLM inference, between 0 and 1. Default is 0.0.
-            x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments.
-                - SETTLE: Records input/output hashes only (most privacy-preserving).
-                - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient).
-                - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata.
-                Defaults to SETTLE_BATCH.
-
-        Returns:
-            TextGenerationOutput: Generated text results including:
-                - Transaction hash ("external" for TEE providers)
-                - String of completion output
-                - Payment hash for x402 transactions
-
-        Raises:
-            OpenGradientError: If the inference fails.
-        """
-        return self._tee_llm_completion(
-            model=model.split("/")[1],
-            prompt=prompt,
-            max_tokens=max_tokens,
-            stop_sequence=stop_sequence,
-            temperature=temperature,
-            x402_settlement_mode=x402_settlement_mode,
-        )
-
-    def _tee_llm_completion(
-        self,
-        model: str,
-        prompt: str,
-        max_tokens: int = 100,
-        stop_sequence: Optional[List[str]] = None,
-        temperature: float = 0.0,
-        x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
-    ) -> TextGenerationOutput:
-        """
-        Route completion request to OpenGradient TEE LLM server with x402 payments.
-
-        Args:
-            model: Model identifier
-            prompt: Input prompt
-            max_tokens: Maximum tokens to generate
-            stop_sequence: Stop sequences
-            temperature: Sampling temperature
-            x402_settlement_mode: Settlement mode for x402 payments
-
-        Returns:
-            TextGenerationOutput with completion
-
-        Raises:
-            OpenGradientError: If request fails
-        """
-
-        async def make_request():
-            # Security Fix: verify=True enabled
-            async with x402HttpxClient(
-                account=self._wallet_account,
-                base_url=self._og_llm_server_url,
-                payment_requirements_selector=self._og_payment_selector,
-                verify=True,
-            ) as client:
-                headers = {
-                    "Content-Type": "application/json",
-                    "Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
-                    "X-SETTLEMENT-TYPE": x402_settlement_mode,
-                }
-
-                payload = {
-                    "model": model,
-                    "prompt": prompt,
-                    "max_tokens": max_tokens,
-                    "temperature": temperature,
-                }
-
-                if stop_sequence:
-                    payload["stop"] = stop_sequence
-
-                try:
-                    response = await client.post("/v1/completions", json=payload, headers=headers, timeout=60)
-
-                    # Read the response content
-                    content = await response.aread()
-                    result = json.loads(content.decode())
-                    payment_hash = ""
-
-                    if X402_PROCESSING_HASH_HEADER in response.headers:
-                        payment_hash = response.headers[X402_PROCESSING_HASH_HEADER]
-
-                    return TextGenerationOutput(
-                        transaction_hash="external", completion_output=result.get("completion"), payment_hash=payment_hash
-                    )
-
-                except Exception as e:
-                    raise OpenGradientError(f"TEE LLM completion request failed: {str(e)}")
-
-        try:
-            return asyncio.run(make_request())
-        except OpenGradientError:
-            raise
-        except Exception as e:
-            raise OpenGradientError(f"TEE LLM completion failed: {str(e)}")
-
-    def llm_chat(
-        self,
-        model: TEE_LLM,
-        messages: List[Dict],
-        max_tokens: int = 100,
-        stop_sequence: Optional[List[str]] = None,
-        temperature: float = 0.0,
-        tools: Optional[List[Dict]] = [],
-        tool_choice: Optional[str] = None,
-        x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
-        stream: bool = False,
-    ) -> Union[TextGenerationOutput, TextGenerationStream]:
-        """
-        Perform inference on an LLM model using chat via TEE.
-
-        Args:
-            model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_3_5_HAIKU).
-            messages (List[Dict]): The messages that will be passed into the chat.
-            max_tokens (int): Maximum number of tokens for LLM output. Default is 100.
-            stop_sequence (List[str], optional): List of stop sequences for LLM.
-            temperature (float): Temperature for LLM inference, between 0 and 1.
-            tools (List[dict], optional): Set of tools for function calling.
-            tool_choice (str, optional): Sets a specific tool to choose.
-            x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments.
-                - SETTLE: Records input/output hashes only (most privacy-preserving).
-                - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient).
-                - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata.
-                Defaults to SETTLE_BATCH.
-            stream (bool, optional): Whether to stream the response. Default is False.
-
-        Returns:
-            Union[TextGenerationOutput, TextGenerationStream]:
-                - If stream=False: TextGenerationOutput with chat_output, transaction_hash, finish_reason, and payment_hash
-                - If stream=True: TextGenerationStream yielding StreamChunk objects with typed deltas (true streaming via threading)
-
-        Raises:
-            OpenGradientError: If the inference fails.
-        """
-        if stream:
-            # Use threading bridge for true sync streaming
-            return self._tee_llm_chat_stream_sync(
-                model=model.split("/")[1],
-                messages=messages,
-                max_tokens=max_tokens,
-                stop_sequence=stop_sequence,
-                temperature=temperature,
-                tools=tools,
-                tool_choice=tool_choice,
-                x402_settlement_mode=x402_settlement_mode,
-            )
-        else:
-            # Non-streaming
-            return self._tee_llm_chat(
-                model=model.split("/")[1],
-                messages=messages,
-                max_tokens=max_tokens,
-                stop_sequence=stop_sequence,
-                temperature=temperature,
-                tools=tools,
-                tool_choice=tool_choice,
-                x402_settlement_mode=x402_settlement_mode,
-            )
-
-    def _tee_llm_chat(
-        self,
-        model: str,
-        messages: List[Dict],
-        max_tokens: int = 100,
-        stop_sequence: Optional[List[str]] = None,
-        temperature: float = 0.0,
-        tools: Optional[List[Dict]] = None,
-        tool_choice: Optional[str] = None,
-        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
-    ) -> TextGenerationOutput:
-        """
-        Route chat request to OpenGradient TEE LLM server with x402 payments.
-
-        Args:
-            model: Model identifier
-            messages: List of chat messages
-            max_tokens: Maximum tokens to generate
-            stop_sequence: Stop sequences
-            temperature: Sampling temperature
-            tools: Function calling tools
-            tool_choice: Tool selection strategy
-            x402_settlement_mode: Settlement mode for x402 payments
-
-        Returns:
-            TextGenerationOutput: Chat completion
-
-        Raises:
-            OpenGradientError: If request fails
-        """
-
-        async def make_request():
-            # Security Fix: verify=True enabled
-            async with x402HttpxClient(
-                account=self._wallet_account,
-                base_url=self._og_llm_server_url,
-                payment_requirements_selector=self._og_payment_selector,
-                verify=True,
-            ) as client:
-                headers = {
-                    "Content-Type": "application/json",
-                    "Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
-                    "X-SETTLEMENT-TYPE": x402_settlement_mode,
-                }
-
-                payload = {
-                    "model": model,
-                    "messages": messages,
-                    "max_tokens": max_tokens,
-                    "temperature": temperature,
-                }
-
-                if stop_sequence:
-                    payload["stop"] = stop_sequence
-
-                if tools:
-                    payload["tools"] = tools
-                    payload["tool_choice"] = tool_choice or "auto"
-
-                try:
-                    # Non-streaming with x402
-                    endpoint = "/v1/chat/completions"
-                    response = await client.post(endpoint, json=payload, headers=headers, timeout=60)
-
-                    # Read the response content
-                    content = await response.aread()
-                    result = json.loads(content.decode())
-
-                    payment_hash = ""
-                    if X402_PROCESSING_HASH_HEADER in response.headers:
-                        payment_hash = response.headers[X402_PROCESSING_HASH_HEADER]
-
-                    choices = result.get("choices")
-                    if not choices:
-                        raise OpenGradientError(f"Invalid response: 'choices' missing or empty in {result}")
-
-                    return TextGenerationOutput(
-                        transaction_hash="external",
-                        finish_reason=choices[0].get("finish_reason"),
-                        chat_output=choices[0].get("message"),
-                        payment_hash=payment_hash,
-                    )
-
-                except Exception as e:
-                    raise OpenGradientError(f"TEE LLM chat request failed: {str(e)}")
-
-        try:
-            return asyncio.run(make_request())
-        except OpenGradientError:
-            raise
-        except Exception as e:
-            raise OpenGradientError(f"TEE LLM chat failed: {str(e)}")
-
-    def _tee_llm_chat_stream_sync(
-        self,
-        model: str,
-        messages: List[Dict],
-        max_tokens: int = 100,
-        stop_sequence: Optional[List[str]] = None,
-        temperature: float = 0.0,
-        tools: Optional[List[Dict]] = None,
-        tool_choice: Optional[str] = None,
-        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
-    ):
-        """
-        Sync streaming using threading bridge - TRUE real-time streaming.
-
-        Yields StreamChunk objects as they arrive from the background thread.
-        NO buffering, NO conversion, just direct pass-through.
-        """
-        import threading
-        from queue import Queue
-
-        queue = Queue()
-        exception_holder = []
-
-        def _run_async():
-            """Run async streaming in background thread"""
-            loop = None
-            try:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-
-                async def _stream():
-                    try:
-                        async for chunk in self._tee_llm_chat_stream_async(
-                            model=model,
-                            messages=messages,
-                            max_tokens=max_tokens,
-                            stop_sequence=stop_sequence,
-                            temperature=temperature,
-                            tools=tools,
-                            tool_choice=tool_choice,
-                            x402_settlement_mode=x402_settlement_mode,
-                        ):
-                            queue.put(chunk)  # Put chunk immediately
-                    except Exception as e:
-                        exception_holder.append(e)
-                    finally:
-                        queue.put(None)  # Signal completion
-
-                loop.run_until_complete(_stream())
-            except Exception as e:
-                exception_holder.append(e)
-                queue.put(None)
-            finally:
-                if loop:
-                    try:
-                        pending = asyncio.all_tasks(loop)
-                        for task in pending:
-                            task.cancel()
-                        loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
-                        # Properly close async generators to avoid RuntimeWarning
-                        loop.run_until_complete(loop.shutdown_asyncgens())
-                    finally:
-                        loop.close()
-
-        # Start background thread
-        thread = threading.Thread(target=_run_async, daemon=True)
-        thread.start()
-
-        # Yield chunks DIRECTLY as they arrive - NO buffering
-        try:
-            while True:
-                chunk = queue.get()  # Blocks until chunk available
-                if chunk is None:
-                    break
-                yield chunk  # Yield immediately!
-
-            thread.join(timeout=5)
-
-            if exception_holder:
-                raise exception_holder[0]
-        except Exception:
-            thread.join(timeout=1)
-            raise
-
-    async def _tee_llm_chat_stream_async(
-        self,
-        model: str,
-        messages: List[Dict],
-        max_tokens: int = 100,
-        stop_sequence: Optional[List[str]] = None,
-        temperature: float = 0.0,
-        tools: Optional[List[Dict]] = None,
-        tool_choice: Optional[str] = None,
-        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
-    ):
-        """
-        Internal async streaming implementation for TEE LLM with x402 payments.
-
-        Yields StreamChunk objects as they arrive from the server.
-        """
-        async with httpx.AsyncClient(
-            base_url=self._og_llm_streaming_server_url,
-            headers={"Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}"},
-            timeout=TIMEOUT,
-            limits=LIMITS,
-            http2=False,
-            follow_redirects=False,
-            auth=X402Auth(account=self._wallet_account, network_filter=DEFAULT_NETWORK_FILTER),  # type: ignore
-            verify=True,
-        ) as client:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
-                "X-SETTLEMENT-TYPE": x402_settlement_mode,
-            }
-
-            payload = {
-                "model": model,
-                "messages": messages,
-                "max_tokens": max_tokens,
-                "temperature": temperature,
-                "stream": True,
-            }
-
-            if stop_sequence:
-                payload["stop"] = stop_sequence
-            if tools:
-                payload["tools"] = tools
-                payload["tool_choice"] = tool_choice or "auto"
-
-            async with client.stream(
-                "POST",
-                "/v1/chat/completions",
-                json=payload,
-                headers=headers,
-            ) as response:
-                buffer = b""
-                async for chunk in response.aiter_raw():
-                    if not chunk:
-                        continue
-
-                    buffer += chunk
-
-                    # Process complete lines from buffer
-                    while b"\n" in buffer:
-                        line_bytes, buffer = buffer.split(b"\n", 1)
-
-                        if not line_bytes.strip():
-                            continue
-
-                        try:
-                            line = line_bytes.decode("utf-8").strip()
-                        except UnicodeDecodeError:
-                            continue
-
-                        if not line.startswith("data: "):
-                            continue
-
-                        data_str = line[6:]
-                        if data_str.strip() == "[DONE]":
-                            return
-
-                        try:
-                            data = json.loads(data_str)
-                            yield StreamChunk.from_sse_data(data)
-                        except json.JSONDecodeError:
-                            continue
-
-    def list_files(self, model_name: str, version: str) -> List[Dict]:
-        """
-        List files for a specific version of a model.
-
-        Args:
-            model_name (str): The unique identifier for the model.
-            version (str): The version identifier for the model.
-
-        Returns:
-            List[Dict]: A list of dictionaries containing file information.
-
-        Raises:
-            OpenGradientError: If the file listing fails.
-        """
-        if not self._hub_user:
-            raise ValueError("User not authenticated")
-
-        url = f"https://api.opengradient.ai/api/v0/models/{model_name}/versions/{version}/files"
-        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}"}
-
-        try:
-            response = requests.get(url, headers=headers)
-            response.raise_for_status()
-            return response.json()
-
-        except requests.RequestException as e:
-            raise OpenGradientError(f"File listing failed: {str(e)}")
-        except Exception as e:
-            raise OpenGradientError(f"Unexpected error during file listing: {str(e)}")
-
-    def _get_abi(self, abi_name) -> str:
-        """
-        Returns the ABI for the requested contract.
-        """
-        abi_path = Path(__file__).parent / "abi" / abi_name
-        with open(abi_path, "r") as f:
-            return json.load(f)
-
-    def _get_bin(self, bin_name) -> str:
-        """
-        Returns the bin for the requested contract.
-        """
-        bin_path = Path(__file__).parent / "bin" / bin_name
-        # Read bytecode with explicit encoding
-        with open(bin_path, "r", encoding="utf-8") as f:
-            bytecode = f.read().strip()
-            if not bytecode.startswith("0x"):
-                bytecode = "0x" + bytecode
-            return bytecode
-
-    def _send_tx_with_revert_handling(self, run_function):
-        """
-        Execute a blockchain transaction with revert error.
-
-        Args:
-            run_function: Function that executes the transaction
-
-        Returns:
-            tx_hash: Transaction hash
-            tx_receipt: Transaction receipt
-
-        Raises:
-            Exception: If transaction fails or gas estimation fails
-        """
-        nonce = self._blockchain.eth.get_transaction_count(self._wallet_account.address, "pending")
-        try:
-            estimated_gas = run_function.estimate_gas({"from": self._wallet_account.address})
-        except ContractLogicError as e:
-            try:
-                run_function.call({"from": self._wallet_account.address})
-
-            except ContractLogicError as call_err:
-                raise ContractLogicError(f"simulation failed with revert reason: {call_err.args[0]}")
-
-            raise ContractLogicError(f"simulation failed with no revert reason. Reason: {e}")
-
-        gas_limit = int(estimated_gas * 3)
-
-        transaction = run_function.build_transaction(
-            {
-                "from": self._wallet_account.address,
-                "nonce": nonce,
-                "gas": gas_limit,
-                "gasPrice": self._blockchain.eth.gas_price,
-            }
-        )
-
-        signed_tx = self._wallet_account.sign_transaction(transaction)
-        tx_hash = self._blockchain.eth.send_raw_transaction(signed_tx.raw_transaction)
-        tx_receipt = self._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=INFERENCE_TX_TIMEOUT)
-
-        if tx_receipt["status"] == 0:
-            try:
-                run_function.call({"from": self._wallet_account.address})
-
-            except ContractLogicError as call_err:
-                raise ContractLogicError(f"Transaction failed with revert reason: {call_err.args[0]}")
-
-            raise ContractLogicError(f"Transaction failed with no revert reason. Receipt: {tx_receipt}")
-
-        return tx_hash, tx_receipt
-
-    def _get_inference_result_from_node(self, inference_id: str, inference_mode: InferenceMode) -> Dict:
-        """
-        Get the inference result from node.
-
-        Args:
-            inference_id (str): Inference id for a inference request
-
-        Returns:
-            Dict: The inference result as returned by the node
-
-        Raises:
-            OpenGradientError: If the request fails or returns an error
-        """
-        try:
-            encoded_id = urllib.parse.quote(inference_id, safe="")
-            url = f"{self._api_url}/artela-network/artela-rollkit/inference/tx/{encoded_id}"
-
-            response = requests.get(url)
-            if response.status_code == 200:
-                resp = response.json()
-                inference_result = resp.get("inference_results", {})
-                if inference_result:
-                    decoded_bytes = base64.b64decode(inference_result[0])
-                    decoded_string = decoded_bytes.decode("utf-8")
-                    output = json.loads(decoded_string).get("InferenceResult", {})
-                    if output is None:
-                        raise OpenGradientError("Missing InferenceResult in inference output")
-
-                    match inference_mode:
-                        case InferenceMode.VANILLA:
-                            if "VanillaResult" not in output:
-                                raise OpenGradientError("Missing VanillaResult in inference output")
-                            if "model_output" not in output["VanillaResult"]:
-                                raise OpenGradientError("Missing model_output in VanillaResult")
-                            return {"output": output["VanillaResult"]["model_output"]}
-
-                        case InferenceMode.TEE:
-                            if "TeeNodeResult" not in output:
-                                raise OpenGradientError("Missing TeeNodeResult in inference output")
-                            if "Response" not in output["TeeNodeResult"]:
-                                raise OpenGradientError("Missing Response in TeeNodeResult")
-                            if "VanillaResponse" in output["TeeNodeResult"]["Response"]:
-                                if "model_output" not in output["TeeNodeResult"]["Response"]["VanillaResponse"]:
-                                    raise OpenGradientError("Missing model_output in VanillaResponse")
-                                return {"output": output["TeeNodeResult"]["Response"]["VanillaResponse"]["model_output"]}
-
-                            else:
-                                raise OpenGradientError("Missing VanillaResponse in TeeNodeResult Response")
-
-                        case InferenceMode.ZKML:
-                            if "ZkmlResult" not in output:
-                                raise OpenGradientError("Missing ZkmlResult in inference output")
-                            if "model_output" not in output["ZkmlResult"]:
-                                raise OpenGradientError("Missing model_output in ZkmlResult")
-                            return {"output": output["ZkmlResult"]["model_output"]}
-
-                        case _:
-                            raise OpenGradientError(f"Invalid inference mode: {inference_mode}")
-                else:
-                    return None
-
-            else:
-                raise OpenGradientError(f"Failed to get inference result: HTTP {response.status_code}")
-
-        except requests.RequestException as e:
-            raise OpenGradientError(f"Failed to get inference result: {str(e)}")
-        except OpenGradientError:
-            raise
-        except Exception as e:
-            raise OpenGradientError(f"Failed to get inference result: {str(e)}")
-
-
-def run_with_retry(txn_function: Callable, max_retries=DEFAULT_MAX_RETRY, retry_delay=DEFAULT_RETRY_DELAY_SEC):
-    """
-    Execute a blockchain transaction with retry logic.
-
-    Args:
-        txn_function: Function that executes the transaction
-        max_retries (int): Maximum number of retry attempts
-        retry_delay (float): Delay in seconds between retries for nonce issues
-    """
-    NONCE_TOO_LOW = "nonce too low"
-    NONCE_TOO_HIGH = "nonce too high"
-    INVALID_NONCE = "invalid nonce"
-
-    effective_retries = max_retries if max_retries is not None else DEFAULT_MAX_RETRY
-
-    for attempt in range(effective_retries):
-        try:
-            return txn_function()
-        except Exception as e:
-            error_msg = str(e).lower()
-
-            nonce_errors = [INVALID_NONCE, NONCE_TOO_LOW, NONCE_TOO_HIGH]
-            if any(error in error_msg for error in nonce_errors):
-                if attempt == effective_retries - 1:
-                    raise OpenGradientError(f"Transaction failed after {effective_retries} attempts: {e}")
-                time.sleep(retry_delay)
-                continue
-
-            raise
diff --git a/src/opengradient/client/__init__.py b/src/opengradient/client/__init__.py
new file mode 100644
index 0000000..1d6af8d
--- /dev/null
+++ b/src/opengradient/client/__init__.py
@@ -0,0 +1,3 @@
+from .client import Client
+
+__all__ = ["Client"]
diff --git a/src/opengradient/utils.py b/src/opengradient/client/_conversions.py
similarity index 99%
rename from src/opengradient/utils.py
rename to src/opengradient/client/_conversions.py
index 064b7a3..495f663 100644
--- a/src/opengradient/utils.py
+++ b/src/opengradient/client/_conversions.py
@@ -8,7 +8,7 @@
 import numpy as np
 from web3.datastructures import AttributeDict
 
-from .types import ModelOutput
+from ..types import ModelOutput
 
 
 def convert_to_fixed_point(number: float) -> Tuple[int, int]:
diff --git a/src/opengradient/client/_utils.py b/src/opengradient/client/_utils.py
new file mode 100644
index 0000000..5e8d1af
--- /dev/null
+++ b/src/opengradient/client/_utils.py
@@ -0,0 +1,65 @@
+import json
+import time
+from pathlib import Path
+from typing import Callable
+
+from .exceptions import OpenGradientError
+
+_ABI_DIR = Path(__file__).parent.parent / "abi"
+_BIN_DIR = Path(__file__).parent.parent / "bin"
+
+# How many times we retry a transaction because of nonce conflict
+DEFAULT_MAX_RETRY = 5
+DEFAULT_RETRY_DELAY_SEC = 1
+
+_NONCE_TOO_LOW = "nonce too low"
+_NONCE_TOO_HIGH = "nonce too high"
+_INVALID_NONCE = "invalid nonce"
+_NONCE_ERRORS = [_INVALID_NONCE, _NONCE_TOO_LOW, _NONCE_TOO_HIGH]
+
+
+def get_abi(abi_name: str) -> dict:
+    """Returns the ABI for the requested contract."""
+    abi_path = _ABI_DIR / abi_name
+    with open(abi_path, "r") as f:
+        return json.load(f)
+
+
+def get_bin(bin_name: str) -> str:
+    """Returns the bytecode for the requested contract."""
+    bin_path = _BIN_DIR / bin_name
+    with open(bin_path, "r", encoding="utf-8") as f:
+        bytecode = f.read().strip()
+        if not bytecode.startswith("0x"):
+            bytecode = "0x" + bytecode
+        return bytecode
+
+
+def run_with_retry(
+    txn_function: Callable,
+    max_retries=DEFAULT_MAX_RETRY,
+    retry_delay=DEFAULT_RETRY_DELAY_SEC,
+):
+    """
+    Execute a blockchain transaction with retry logic.
+
+    Args:
+        txn_function: Function that executes the transaction
+        max_retries (int): Maximum number of retry attempts
+        retry_delay (float): Delay in seconds between retries for nonce issues
+    """
+    effective_retries = max_retries if max_retries is not None else DEFAULT_MAX_RETRY
+
+    for attempt in range(effective_retries):
+        try:
+            return txn_function()
+        except Exception as e:
+            error_msg = str(e).lower()
+
+            if any(error in error_msg for error in _NONCE_ERRORS):
+                if attempt == effective_retries - 1:
+                    raise OpenGradientError(f"Transaction failed after {effective_retries} attempts: {e}")
+                time.sleep(retry_delay)
+                continue
+
+            raise
diff --git a/src/opengradient/client/alpha.py b/src/opengradient/client/alpha.py
new file mode 100644
index 0000000..d7e5bfb
--- /dev/null
+++ b/src/opengradient/client/alpha.py
@@ -0,0 +1,247 @@
+"""
+Alpha Testnet features for OpenGradient SDK.
+
+This module contains features that are only available on the Alpha Testnet,
+including workflow management and ML model execution.
+"""
+
+from typing import List, Optional
+
+from eth_account.account import LocalAccount
+from web3 import Web3
+from web3.exceptions import ContractLogicError
+
+from ..defaults import DEFAULT_SCHEDULER_ADDRESS
+from ..types import HistoricalInputQuery, ModelOutput, SchedulerParams
+from ._conversions import convert_array_to_model_output
+from ._utils import get_abi, get_bin, run_with_retry
+
+# How much time we wait for txn to be included in chain
+INFERENCE_TX_TIMEOUT = 120
+REGULAR_TX_TIMEOUT = 30
+
+
+class Alpha:
+    """
+    Alpha Testnet features namespace.
+
+    This class provides access to features that are only available on the Alpha Testnet,
+    including workflow deployment and execution.
+
+    Usage:
+        client = og.Client(...)
+        result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
+    """
+
+    def __init__(self, blockchain: Web3, wallet_account: LocalAccount):
+        self._blockchain = blockchain
+        self._wallet_account = wallet_account
+
+    def new_workflow(
+        self,
+        model_cid: str,
+        input_query: HistoricalInputQuery,
+        input_tensor_name: str,
+        scheduler_params: Optional[SchedulerParams] = None,
+    ) -> str:
+        """
+        Deploy a new workflow contract with the specified parameters.
+
+        This function deploys a new workflow contract on OpenGradient that connects
+        an AI model with its required input data. When executed, the workflow will fetch
+        the specified model, evaluate the input query to get data, and perform inference.
+
+        The workflow can be set to execute manually or automatically via a scheduler.
+
+        Args:
+            model_cid (str): CID of the model to be executed from the Model Hub
+            input_query (HistoricalInputQuery): Input definition for the model inference,
+                will be evaluated at runtime for each inference
+            input_tensor_name (str): Name of the input tensor expected by the model
+            scheduler_params (Optional[SchedulerParams]): Scheduler configuration for automated execution:
+                - frequency: Execution frequency in seconds
+                - duration_hours: How long the schedule should live for
+
+        Returns:
+            str: Deployed contract address. If scheduler_params was provided, the workflow
+                 will be automatically executed according to the specified schedule.
+
+        Raises:
+            Exception: If transaction fails or gas estimation fails
+        """
+        # Get contract ABI and bytecode
+        abi = get_abi("PriceHistoryInference.abi")
+        bytecode = get_bin("PriceHistoryInference.bin")
+
+        def deploy_transaction():
+            contract = self._blockchain.eth.contract(abi=abi, bytecode=bytecode)
+            query_tuple = input_query.to_abi_format()
+            constructor_args = [model_cid, input_tensor_name, query_tuple]
+
+            try:
+                # Estimate gas needed
+                estimated_gas = contract.constructor(*constructor_args).estimate_gas({"from": self._wallet_account.address})
+                gas_limit = int(estimated_gas * 1.2)
+            except Exception as e:
+                print(f"Gas estimation failed: {str(e)}")
+                gas_limit = 5000000  # Conservative fallback
+                print(f"Using fallback gas limit: {gas_limit}")
+
+            transaction = contract.constructor(*constructor_args).build_transaction(
+                {
+                    "from": self._wallet_account.address,
+                    "nonce": self._blockchain.eth.get_transaction_count(self._wallet_account.address, "pending"),
+                    "gas": gas_limit,
+                    "gasPrice": self._blockchain.eth.gas_price,
+                    "chainId": self._blockchain.eth.chain_id,
+                }
+            )
+
+            signed_txn = self._wallet_account.sign_transaction(transaction)
+            tx_hash = self._blockchain.eth.send_raw_transaction(signed_txn.raw_transaction)
+
+            tx_receipt = self._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=60)
+
+            if tx_receipt["status"] == 0:
+                raise Exception(f"Contract deployment failed, transaction hash: {tx_hash.hex()}")
+
+            return tx_receipt.contractAddress
+
+        contract_address = run_with_retry(deploy_transaction)
+
+        if scheduler_params:
+            self._register_with_scheduler(contract_address, scheduler_params)
+
+        return contract_address
+
+    def _register_with_scheduler(self, contract_address: str, scheduler_params: SchedulerParams) -> None:
+        """
+        Register the deployed workflow contract with the scheduler for automated execution.
+
+        Args:
+            contract_address (str): Address of the deployed workflow contract
+            scheduler_params (SchedulerParams): Scheduler configuration containing:
+                - frequency: Execution frequency in seconds
+                - duration_hours: How long to run in hours
+                - end_time: Unix timestamp when scheduling should end
+
+        Raises:
+            Exception: If registration with scheduler fails. The workflow contract will
+                      still be deployed and can be executed manually.
+        """
+        scheduler_abi = get_abi("WorkflowScheduler.abi")
+
+        # Scheduler contract address
+        scheduler_address = DEFAULT_SCHEDULER_ADDRESS
+        scheduler_contract = self._blockchain.eth.contract(address=scheduler_address, abi=scheduler_abi)
+
+        try:
+            # Register the workflow with the scheduler
+            scheduler_tx = scheduler_contract.functions.registerTask(
+                contract_address, scheduler_params.end_time, scheduler_params.frequency
+            ).build_transaction(
+                {
+                    "from": self._wallet_account.address,
+                    "gas": 300000,
+                    "gasPrice": self._blockchain.eth.gas_price,
+                    "nonce": self._blockchain.eth.get_transaction_count(self._wallet_account.address, "pending"),
+                    "chainId": self._blockchain.eth.chain_id,
+                }
+            )
+
+            signed_scheduler_tx = self._wallet_account.sign_transaction(scheduler_tx)
+            scheduler_tx_hash = self._blockchain.eth.send_raw_transaction(signed_scheduler_tx.raw_transaction)
+            self._blockchain.eth.wait_for_transaction_receipt(scheduler_tx_hash, timeout=REGULAR_TX_TIMEOUT)
+        except Exception as e:
+            print(f"Error registering contract with scheduler: {str(e)}")
+            print("  The workflow contract is still deployed and can be executed manually.")
+
+    def read_workflow_result(self, contract_address: str) -> ModelOutput:
+        """
+        Reads the latest inference result from a deployed workflow contract.
+
+        Args:
+            contract_address (str): Address of the deployed workflow contract
+
+        Returns:
+            ModelOutput: The inference result from the contract
+
+        Raises:
+            ContractLogicError: If the transaction fails
+            Web3Error: If there are issues with the web3 connection or contract interaction
+        """
+        # Get the contract interface
+        contract = self._blockchain.eth.contract(
+            address=Web3.to_checksum_address(contract_address), abi=get_abi("PriceHistoryInference.abi")
+        )
+
+        # Get the result
+        result = contract.functions.getInferenceResult().call()
+
+        return convert_array_to_model_output(result)
+
+    def run_workflow(self, contract_address: str) -> ModelOutput:
+        """
+        Triggers the run() function on a deployed workflow contract and returns the result.
+
+        Args:
+            contract_address (str): Address of the deployed workflow contract
+
+        Returns:
+            ModelOutput: The inference result from the contract
+
+        Raises:
+            ContractLogicError: If the transaction fails
+            Web3Error: If there are issues with the web3 connection or contract interaction
+        """
+        # Get the contract interface
+        contract = self._blockchain.eth.contract(
+            address=Web3.to_checksum_address(contract_address), abi=get_abi("PriceHistoryInference.abi")
+        )
+
+        # Call run() function
+        nonce = self._blockchain.eth.get_transaction_count(self._wallet_account.address, "pending")
+
+        run_function = contract.functions.run()
+        transaction = run_function.build_transaction(
+            {
+                "from": self._wallet_account.address,
+                "nonce": nonce,
+                "gas": 30000000,
+                "gasPrice": self._blockchain.eth.gas_price,
+                "chainId": self._blockchain.eth.chain_id,
+            }
+        )
+
+        signed_txn = self._wallet_account.sign_transaction(transaction)
+        tx_hash = self._blockchain.eth.send_raw_transaction(signed_txn.raw_transaction)
+        tx_receipt = self._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=INFERENCE_TX_TIMEOUT)
+
+        if tx_receipt.status == 0:
+            raise ContractLogicError(f"Run transaction failed. Receipt: {tx_receipt}")
+
+        # Get the inference result from the contract
+        result = contract.functions.getInferenceResult().call()
+
+        return convert_array_to_model_output(result)
+
+    def read_workflow_history(self, contract_address: str, num_results: int) -> List[ModelOutput]:
+        """
+        Gets historical inference results from a workflow contract.
+
+        Retrieves the specified number of most recent inference results from the contract's
+        storage, with the most recent result first.
+
+        Args:
+            contract_address (str): Address of the deployed workflow contract
+            num_results (int): Number of historical results to retrieve
+
+        Returns:
+            List[ModelOutput]: List of historical inference results
+        """
+        contract = self._blockchain.eth.contract(
+            address=Web3.to_checksum_address(contract_address), abi=get_abi("PriceHistoryInference.abi")
+        )
+
+        results = contract.functions.getLastInferenceResults(num_results).call()
+        return [convert_array_to_model_output(result) for result in results]
diff --git a/src/opengradient/client/client.py b/src/opengradient/client/client.py
new file mode 100644
index 0000000..1793dfe
--- /dev/null
+++ b/src/opengradient/client/client.py
@@ -0,0 +1,91 @@
+from typing import Optional
+
+from web3 import Web3
+
+from ..defaults import (
+    DEFAULT_API_URL,
+    DEFAULT_INFERENCE_CONTRACT_ADDRESS,
+    DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
+    DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
+    DEFAULT_RPC_URL,
+)
+from .llm import LLM
+from .model_hub import ModelHub
+from .onchain_inference import Inference
+
+
+class Client:
+    model_hub: ModelHub
+    llm: LLM
+    inference: Inference
+
+    def __init__(
+        self,
+        private_key: str,
+        email: Optional[str] = None,
+        password: Optional[str] = None,
+        rpc_url: str = DEFAULT_RPC_URL,
+        api_url: str = DEFAULT_API_URL,
+        contract_address: str = DEFAULT_INFERENCE_CONTRACT_ADDRESS,
+        og_llm_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_SERVER_URL,
+        og_llm_streaming_server_url: Optional[str] = DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL,
+    ):
+        """
+        Initialize the OpenGradient client.
+
+        Args:
+            private_key: Private key for OpenGradient transactions.
+            email: Email for Model Hub authentication. Optional.
+            password: Password for Model Hub authentication. Optional.
+            rpc_url: RPC URL for the blockchain network.
+            api_url: API URL for the OpenGradient API.
+            contract_address: Inference contract address.
+            og_llm_server_url: OpenGradient LLM server URL.
+            og_llm_streaming_server_url: OpenGradient LLM streaming server URL.
+        """
+        blockchain = Web3(Web3.HTTPProvider(rpc_url))
+        wallet_account = blockchain.eth.account.from_key(private_key)
+
+        hub_user = None
+        if email is not None:
+            hub_user = ModelHub._login_to_hub(email, password)
+
+        # Store shared state needed by alpha namespace
+        self._blockchain = blockchain
+        self._wallet_account = wallet_account
+
+        # Create namespaces
+        self.model_hub = ModelHub(hub_user=hub_user)
+
+        self.llm = LLM(
+            wallet_account=wallet_account,
+            og_llm_server_url=og_llm_server_url,
+            og_llm_streaming_server_url=og_llm_streaming_server_url,
+        )
+
+        self.inference = Inference(
+            blockchain=blockchain,
+            wallet_account=wallet_account,
+            inference_hub_contract_address=contract_address,
+            api_url=api_url,
+        )
+
+        self._alpha = None  # Lazy initialization for alpha namespace
+
+    @property
+    def alpha(self):
+        """
+        Access Alpha Testnet features.
+
+        Returns:
+            Alpha: Alpha namespace with workflow and ML model execution methods.
+
+        Example:
+            client = og.Client(...)
+            result = client.alpha.new_workflow(model_cid, input_query, input_tensor_name)
+        """
+        if self._alpha is None:
+            from .alpha import Alpha
+
+            self._alpha = Alpha(self._blockchain, self._wallet_account)
+        return self._alpha
diff --git a/src/opengradient/exceptions.py b/src/opengradient/client/exceptions.py
similarity index 100%
rename from src/opengradient/exceptions.py
rename to src/opengradient/client/exceptions.py
diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py
new file mode 100644
index 0000000..b039fd0
--- /dev/null
+++ b/src/opengradient/client/llm.py
@@ -0,0 +1,465 @@
+import asyncio
+import json
+from typing import Dict, List, Optional, Union
+
+import httpx
+from eth_account.account import LocalAccount
+from x402.clients.base import x402Client
+from x402.clients.httpx import x402HttpxClient
+
+from ..defaults import (
+    DEFAULT_NETWORK_FILTER,
+)
+from ..types import (
+    TEE_LLM,
+    StreamChunk,
+    TextGenerationOutput,
+    TextGenerationStream,
+    x402SettlementMode,
+)
+from .exceptions import OpenGradientError
+from .x402_auth import X402Auth
+
+X402_PROCESSING_HASH_HEADER = "x-processing-hash"
+X402_PLACEHOLDER_API_KEY = "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
+
+TIMEOUT = httpx.Timeout(
+    timeout=90.0,
+    connect=15.0,
+    read=15.0,
+    write=30.0,
+    pool=10.0,
+)
+LIMITS = httpx.Limits(
+    max_keepalive_connections=100,
+    max_connections=500,
+    keepalive_expiry=60 * 20,  # 20 minutes
+)
+
+
+class LLM:
+    def __init__(self, wallet_account: LocalAccount, og_llm_server_url: str, og_llm_streaming_server_url: str):
+        self._wallet_account = wallet_account
+        self._og_llm_server_url = og_llm_server_url
+        self._og_llm_streaming_server_url = og_llm_streaming_server_url
+
+    def _og_payment_selector(self, accepts, network_filter=DEFAULT_NETWORK_FILTER, scheme_filter=None, max_value=None):
+        """Custom payment selector for OpenGradient network."""
+        return x402Client.default_payment_requirements_selector(
+            accepts,
+            network_filter=network_filter,
+            scheme_filter=scheme_filter,
+            max_value=max_value,
+        )
+
+    def completion(
+        self,
+        model: TEE_LLM,
+        prompt: str,
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
+    ) -> TextGenerationOutput:
+        """
+        Perform inference on an LLM model using completions via TEE.
+
+        Args:
+            model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_3_5_HAIKU).
+            prompt (str): The input prompt for the LLM.
+            max_tokens (int): Maximum number of tokens for LLM output. Default is 100.
+            stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None.
+            temperature (float): Temperature for LLM inference, between 0 and 1. Default is 0.0.
+            x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments.
+                - SETTLE: Records input/output hashes only (most privacy-preserving).
+                - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient).
+                - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata.
+                Defaults to SETTLE_BATCH.
+
+        Returns:
+            TextGenerationOutput: Generated text results including:
+                - Transaction hash ("external" for TEE providers)
+                - String of completion output
+                - Payment hash for x402 transactions
+
+        Raises:
+            OpenGradientError: If the inference fails.
+        """
+        return self._tee_llm_completion(
+            model=model.split("/")[1],
+            prompt=prompt,
+            max_tokens=max_tokens,
+            stop_sequence=stop_sequence,
+            temperature=temperature,
+            x402_settlement_mode=x402_settlement_mode,
+        )
+
+    def _tee_llm_completion(
+        self,
+        model: str,
+        prompt: str,
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
+    ) -> TextGenerationOutput:
+        """
+        Route completion request to OpenGradient TEE LLM server with x402 payments.
+        """
+
+        async def make_request():
+            # Security Fix: verify=True enabled
+            async with x402HttpxClient(
+                account=self._wallet_account,
+                base_url=self._og_llm_server_url,
+                payment_requirements_selector=self._og_payment_selector,
+                verify=True,
+            ) as client:
+                headers = {
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
+                    "X-SETTLEMENT-TYPE": x402_settlement_mode,
+                }
+
+                payload = {
+                    "model": model,
+                    "prompt": prompt,
+                    "max_tokens": max_tokens,
+                    "temperature": temperature,
+                }
+
+                if stop_sequence:
+                    payload["stop"] = stop_sequence
+
+                try:
+                    response = await client.post("/v1/completions", json=payload, headers=headers, timeout=60)
+
+                    # Read the response content
+                    content = await response.aread()
+                    result = json.loads(content.decode())
+                    payment_hash = ""
+
+                    if X402_PROCESSING_HASH_HEADER in response.headers:
+                        payment_hash = response.headers[X402_PROCESSING_HASH_HEADER]
+
+                    return TextGenerationOutput(
+                        transaction_hash="external", completion_output=result.get("completion"), payment_hash=payment_hash
+                    )
+
+                except Exception as e:
+                    raise OpenGradientError(f"TEE LLM completion request failed: {str(e)}")
+
+        try:
+            return asyncio.run(make_request())
+        except OpenGradientError:
+            raise
+        except Exception as e:
+            raise OpenGradientError(f"TEE LLM completion failed: {str(e)}")
+
+    def chat(
+        self,
+        model: TEE_LLM,
+        messages: List[Dict],
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        tools: Optional[List[Dict]] = [],
+        tool_choice: Optional[str] = None,
+        x402_settlement_mode: Optional[x402SettlementMode] = x402SettlementMode.SETTLE_BATCH,
+        stream: bool = False,
+    ) -> Union[TextGenerationOutput, TextGenerationStream]:
+        """
+        Perform inference on an LLM model using chat via TEE.
+
+        Args:
+            model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_3_5_HAIKU).
+            messages (List[Dict]): The messages that will be passed into the chat.
+            max_tokens (int): Maximum number of tokens for LLM output. Default is 100.
+            stop_sequence (List[str], optional): List of stop sequences for LLM.
+            temperature (float): Temperature for LLM inference, between 0 and 1.
+            tools (List[dict], optional): Set of tools for function calling.
+            tool_choice (str, optional): Sets a specific tool to choose.
+            x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments.
+                - SETTLE: Records input/output hashes only (most privacy-preserving).
+                - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient).
+                - SETTLE_METADATA: Records full model info, complete input/output data, and all metadata.
+                Defaults to SETTLE_BATCH.
+            stream (bool, optional): Whether to stream the response. Default is False.
+
+        Returns:
+            Union[TextGenerationOutput, TextGenerationStream]:
+                - If stream=False: TextGenerationOutput with chat_output, transaction_hash, finish_reason, and payment_hash
+                - If stream=True: TextGenerationStream yielding StreamChunk objects with typed deltas (true streaming via threading)
+
+        Raises:
+            OpenGradientError: If the inference fails.
+        """
+        if stream:
+            # Use threading bridge for true sync streaming
+            return self._tee_llm_chat_stream_sync(
+                model=model.split("/")[1],
+                messages=messages,
+                max_tokens=max_tokens,
+                stop_sequence=stop_sequence,
+                temperature=temperature,
+                tools=tools,
+                tool_choice=tool_choice,
+                x402_settlement_mode=x402_settlement_mode,
+            )
+        else:
+            # Non-streaming
+            return self._tee_llm_chat(
+                model=model.split("/")[1],
+                messages=messages,
+                max_tokens=max_tokens,
+                stop_sequence=stop_sequence,
+                temperature=temperature,
+                tools=tools,
+                tool_choice=tool_choice,
+                x402_settlement_mode=x402_settlement_mode,
+            )
+
+    def _tee_llm_chat(
+        self,
+        model: str,
+        messages: List[Dict],
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: Optional[str] = None,
+        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
+    ) -> TextGenerationOutput:
+        """
+        Route chat request to OpenGradient TEE LLM server with x402 payments.
+        """
+
+        async def make_request():
+            # Security Fix: verify=True enabled
+            async with x402HttpxClient(
+                account=self._wallet_account,
+                base_url=self._og_llm_server_url,
+                payment_requirements_selector=self._og_payment_selector,
+                verify=True,
+            ) as client:
+                headers = {
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
+                    "X-SETTLEMENT-TYPE": x402_settlement_mode,
+                }
+
+                payload = {
+                    "model": model,
+                    "messages": messages,
+                    "max_tokens": max_tokens,
+                    "temperature": temperature,
+                }
+
+                if stop_sequence:
+                    payload["stop"] = stop_sequence
+
+                if tools:
+                    payload["tools"] = tools
+                    payload["tool_choice"] = tool_choice or "auto"
+
+                try:
+                    # Non-streaming with x402
+                    endpoint = "/v1/chat/completions"
+                    response = await client.post(endpoint, json=payload, headers=headers, timeout=60)
+
+                    # Read the response content
+                    content = await response.aread()
+                    result = json.loads(content.decode())
+
+                    payment_hash = ""
+                    if X402_PROCESSING_HASH_HEADER in response.headers:
+                        payment_hash = response.headers[X402_PROCESSING_HASH_HEADER]
+
+                    choices = result.get("choices")
+                    if not choices:
+                        raise OpenGradientError(f"Invalid response: 'choices' missing or empty in {result}")
+
+                    return TextGenerationOutput(
+                        transaction_hash="external",
+                        finish_reason=choices[0].get("finish_reason"),
+                        chat_output=choices[0].get("message"),
+                        payment_hash=payment_hash,
+                    )
+
+                except Exception as e:
+                    raise OpenGradientError(f"TEE LLM chat request failed: {str(e)}")
+
+        try:
+            return asyncio.run(make_request())
+        except OpenGradientError:
+            raise
+        except Exception as e:
+            raise OpenGradientError(f"TEE LLM chat failed: {str(e)}")
+
+    def _tee_llm_chat_stream_sync(
+        self,
+        model: str,
+        messages: List[Dict],
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: Optional[str] = None,
+        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
+    ):
+        """
+        Sync streaming using threading bridge - TRUE real-time streaming.
+
+        Yields StreamChunk objects as they arrive from the background thread.
+        NO buffering, NO conversion, just direct pass-through.
+        """
+        import threading
+        from queue import Queue
+
+        queue = Queue()
+        exception_holder = []
+
+        def _run_async():
+            """Run async streaming in background thread"""
+            loop = None
+            try:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+
+                async def _stream():
+                    try:
+                        async for chunk in self._tee_llm_chat_stream_async(
+                            model=model,
+                            messages=messages,
+                            max_tokens=max_tokens,
+                            stop_sequence=stop_sequence,
+                            temperature=temperature,
+                            tools=tools,
+                            tool_choice=tool_choice,
+                            x402_settlement_mode=x402_settlement_mode,
+                        ):
+                            queue.put(chunk)  # Put chunk immediately
+                    except Exception as e:
+                        exception_holder.append(e)
+                    finally:
+                        queue.put(None)  # Signal completion
+
+                loop.run_until_complete(_stream())
+            except Exception as e:
+                exception_holder.append(e)
+                queue.put(None)
+            finally:
+                if loop:
+                    try:
+                        pending = asyncio.all_tasks(loop)
+                        for task in pending:
+                            task.cancel()
+                        loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
+                        # Properly close async generators to avoid RuntimeWarning
+                        loop.run_until_complete(loop.shutdown_asyncgens())
+                    finally:
+                        loop.close()
+
+        # Start background thread
+        thread = threading.Thread(target=_run_async, daemon=True)
+        thread.start()
+
+        # Yield chunks DIRECTLY as they arrive - NO buffering
+        try:
+            while True:
+                chunk = queue.get()  # Blocks until chunk available
+                if chunk is None:
+                    break
+                yield chunk  # Yield immediately!
+
+            thread.join(timeout=5)
+
+            if exception_holder:
+                raise exception_holder[0]
+        except Exception:
+            thread.join(timeout=1)
+            raise
+
+    async def _tee_llm_chat_stream_async(
+        self,
+        model: str,
+        messages: List[Dict],
+        max_tokens: int = 100,
+        stop_sequence: Optional[List[str]] = None,
+        temperature: float = 0.0,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: Optional[str] = None,
+        x402_settlement_mode: x402SettlementMode = x402SettlementMode.SETTLE_BATCH,
+    ):
+        """
+        Internal async streaming implementation for TEE LLM with x402 payments.
+
+        Yields StreamChunk objects as they arrive from the server.
+        """
+        async with httpx.AsyncClient(
+            base_url=self._og_llm_streaming_server_url,
+            headers={"Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}"},
+            timeout=TIMEOUT,
+            limits=LIMITS,
+            http2=False,
+            follow_redirects=False,
+            auth=X402Auth(account=self._wallet_account, network_filter=DEFAULT_NETWORK_FILTER),  # type: ignore
+            verify=True,
+        ) as client:
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {X402_PLACEHOLDER_API_KEY}",
+                "X-SETTLEMENT-TYPE": x402_settlement_mode,
+            }
+
+            payload = {
+                "model": model,
+                "messages": messages,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "stream": True,
+            }
+
+            if stop_sequence:
+                payload["stop"] = stop_sequence
+            if tools:
+                payload["tools"] = tools
+                payload["tool_choice"] = tool_choice or "auto"
+
+            async with client.stream(
+                "POST",
+                "/v1/chat/completions",
+                json=payload,
+                headers=headers,
+            ) as response:
+                buffer = b""
+                async for chunk in response.aiter_raw():
+                    if not chunk:
+                        continue
+
+                    buffer += chunk
+
+                    # Process complete lines from buffer
+                    while b"\n" in buffer:
+                        line_bytes, buffer = buffer.split(b"\n", 1)
+
+                        if not line_bytes.strip():
+                            continue
+
+                        try:
+                            line = line_bytes.decode("utf-8").strip()
+                        except UnicodeDecodeError:
+                            continue
+
+                        if not line.startswith("data: "):
+                            continue
+
+                        data_str = line[6:]
+                        if data_str.strip() == "[DONE]":
+                            return
+
+                        try:
+                            data = json.loads(data_str)
+                            yield StreamChunk.from_sse_data(data)
+                        except json.JSONDecodeError:
+                            continue
diff --git a/src/opengradient/client/model_hub.py b/src/opengradient/client/model_hub.py
new file mode 100644
index 0000000..ed3409c
--- /dev/null
+++ b/src/opengradient/client/model_hub.py
@@ -0,0 +1,195 @@
+import os
+from typing import Dict, List, Optional
+
+import firebase  # type: ignore[import-untyped]
+import requests
+from requests_toolbelt import MultipartEncoder  # type: ignore[import-untyped]
+
+from ..types import FileUploadResult, ModelRepository
+from .exceptions import OpenGradientError
+
+# Security Update: Credentials moved to environment variables
+_FIREBASE_CONFIG = {
+    "apiKey": os.getenv("FIREBASE_API_KEY"),
+    "authDomain": os.getenv("FIREBASE_AUTH_DOMAIN"),
+    "projectId": os.getenv("FIREBASE_PROJECT_ID"),
+    "storageBucket": os.getenv("FIREBASE_STORAGE_BUCKET"),
+    "appId": os.getenv("FIREBASE_APP_ID"),
+    "databaseURL": os.getenv("FIREBASE_DATABASE_URL", ""),
+}
+
+
+class ModelHub:
+    def __init__(self, hub_user: Optional[Dict] = None):
+        self._hub_user = hub_user
+
+    @staticmethod
+    def _login_to_hub(email, password):
+        if not _FIREBASE_CONFIG.get("apiKey"):
+            raise ValueError("Firebase API Key is missing in environment variables")
+
+        firebase_app = firebase.initialize_app(_FIREBASE_CONFIG)
+        return firebase_app.auth().sign_in_with_email_and_password(email, password)
+
+    def create_model(self, model_name: str, model_desc: str, version: str = "1.00") -> ModelRepository:
+        """
+        Create a new model with the given model_name and model_desc, and a specified version.
+
+        Args:
+            model_name (str): The name of the model.
+            model_desc (str): The description of the model.
+            version (str): The version identifier (default is "1.00").
+
+        Returns:
+            dict: The server response containing model details.
+
+        Raises:
+            CreateModelError: If the model creation fails.
+        """
+        if not self._hub_user:
+            raise ValueError("User not authenticated")
+
+        url = "https://api.opengradient.ai/api/v0/models/"
+        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}", "Content-Type": "application/json"}
+        payload = {"name": model_name, "description": model_desc}
+
+        try:
+            response = requests.post(url, json=payload, headers=headers)
+            response.raise_for_status()
+        except requests.HTTPError as e:
+            error_details = f"HTTP {e.response.status_code}: {e.response.text}"
+            raise OpenGradientError(f"Model creation failed: {error_details}") from e
+
+        json_response = response.json()
+        model_name = json_response.get("name")
+        if not model_name:
+            raise Exception(f"Model creation response missing 'name'. Full response: {json_response}")
+
+        # Create the specified version for the newly created model
+        version_response = self.create_version(model_name, version)
+
+        return ModelRepository(model_name, version_response["versionString"])
+
+    def create_version(self, model_name: str, notes: str = "", is_major: bool = False) -> dict:
+        """
+        Create a new version for the specified model.
+
+        Args:
+            model_name (str): The unique identifier for the model.
+            notes (str, optional): Notes for the new version.
+            is_major (bool, optional): Whether this is a major version update. Defaults to False.
+
+        Returns:
+            dict: The server response containing version details.
+
+        Raises:
+            Exception: If the version creation fails.
+        """
+        if not self._hub_user:
+            raise ValueError("User not authenticated")
+
+        url = f"https://api.opengradient.ai/api/v0/models/{model_name}/versions"
+        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}", "Content-Type": "application/json"}
+        payload = {"notes": notes, "is_major": is_major}
+
+        try:
+            response = requests.post(url, json=payload, headers=headers, allow_redirects=False)
+            response.raise_for_status()
+
+            json_response = response.json()
+
+            if isinstance(json_response, list) and not json_response:
+                return {"versionString": "Unknown", "note": "Created based on empty response"}
+            elif isinstance(json_response, dict):
+                version_string = json_response.get("versionString")
+                if not version_string:
+                    return {"versionString": "Unknown", "note": "Version ID not provided in response"}
+                return {"versionString": version_string}
+            else:
+                raise Exception(f"Unexpected response type: {type(json_response)}")
+
+        except requests.RequestException as e:
+            raise Exception(f"Version creation failed: {str(e)}")
+        except Exception:
+            raise
+
+    def upload(self, model_path: str, model_name: str, version: str) -> FileUploadResult:
+        """
+        Upload a model file to the server.
+
+        Args:
+            model_path (str): The path to the model file.
+            model_name (str): The unique identifier for the model.
+            version (str): The version identifier for the model.
+
+        Returns:
+            dict: The processed result.
+
+        Raises:
+            OpenGradientError: If the upload fails.
+        """
+
+        if not self._hub_user:
+            raise ValueError("User not authenticated")
+
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model file not found: {model_path}")
+
+        url = f"https://api.opengradient.ai/api/v0/models/{model_name}/versions/{version}/files"
+        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}"}
+
+        try:
+            with open(model_path, "rb") as file:
+                encoder = MultipartEncoder(fields={"file": (os.path.basename(model_path), file, "application/octet-stream")})
+                headers["Content-Type"] = encoder.content_type
+
+                response = requests.post(url, data=encoder, headers=headers, timeout=3600)
+
+                if response.status_code == 201:
+                    if response.content and response.content != b"null":
+                        json_response = response.json()
+                        return FileUploadResult(json_response.get("ipfsCid"), json_response.get("size"))
+                    else:
+                        raise RuntimeError("Empty or null response content received")
+                elif response.status_code == 500:
+                    raise OpenGradientError("Internal server error occurred", status_code=500)
+                else:
+                    error_message = response.json().get("detail", "Unknown error occurred")
+                    raise OpenGradientError(f"Upload failed: {error_message}", status_code=response.status_code)
+
+        except requests.RequestException as e:
+            raise OpenGradientError(f"Upload failed: {str(e)}")
+        except OpenGradientError:
+            raise
+        except Exception as e:
+            raise OpenGradientError(f"Unexpected error during upload: {str(e)}")
+
+    def list_files(self, model_name: str, version: str) -> List[Dict]:
+        """
+        List files for a specific version of a model.
+
+        Args:
+            model_name (str): The unique identifier for the model.
+            version (str): The version identifier for the model.
+
+        Returns:
+            List[Dict]: A list of dictionaries containing file information.
+
+        Raises:
+            OpenGradientError: If the file listing fails.
+        """
+        if not self._hub_user:
+            raise ValueError("User not authenticated")
+
+        url = f"https://api.opengradient.ai/api/v0/models/{model_name}/versions/{version}/files"
+        headers = {"Authorization": f"Bearer {self._hub_user['idToken']}"}
+
+        try:
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            return response.json()
+
+        except requests.RequestException as e:
+            raise OpenGradientError(f"File listing failed: {str(e)}")
+        except Exception as e:
+            raise OpenGradientError(f"Unexpected error during file listing: {str(e)}")
diff --git a/src/opengradient/client/onchain_inference.py b/src/opengradient/client/onchain_inference.py
new file mode 100644
index 0000000..0a4a7bb
--- /dev/null
+++ b/src/opengradient/client/onchain_inference.py
@@ -0,0 +1,224 @@
+import base64
+import json
+import urllib.parse
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+import requests
+from eth_account.account import LocalAccount
+from web3 import Web3
+from web3.exceptions import ContractLogicError
+from web3.logs import DISCARD
+
+from ..types import InferenceMode, InferenceResult
+from ._conversions import convert_to_model_input, convert_to_model_output
+from ._utils import get_abi, run_with_retry
+from .exceptions import OpenGradientError
+
+# How much time we wait for inference txn to be included in chain
+INFERENCE_TX_TIMEOUT = 120
+
+PRECOMPILE_CONTRACT_ADDRESS = "0x00000000000000000000000000000000000000F4"
+
+
+class Inference:
+    def __init__(
+        self,
+        blockchain: Web3,
+        wallet_account: LocalAccount,
+        inference_hub_contract_address: str,
+        api_url: str,
+    ):
+        self._blockchain = blockchain
+        self._wallet_account = wallet_account
+        self._inference_hub_contract_address = inference_hub_contract_address
+        self._api_url = api_url
+        self._inference_abi = None
+        self._precompile_abi = None
+
+    @property
+    def inference_abi(self) -> dict:
+        if self._inference_abi is None:
+            self._inference_abi = get_abi("inference.abi")
+        return self._inference_abi
+
+    @property
+    def precompile_abi(self) -> dict:
+        if self._precompile_abi is None:
+            self._precompile_abi = get_abi("InferencePrecompile.abi")
+        return self._precompile_abi
+
+    def infer(
+        self,
+        model_cid: str,
+        inference_mode: InferenceMode,
+        model_input: Dict[str, Union[str, int, float, List, np.ndarray]],
+        max_retries: Optional[int] = None,
+    ) -> InferenceResult:
+        """
+        Perform inference on a model.
+
+        Args:
+            model_cid (str): The unique content identifier for the model from IPFS.
+            inference_mode (InferenceMode): The inference mode.
+            model_input (Dict[str, Union[str, int, float, List, np.ndarray]]): The input data for the model.
+            max_retries (int, optional): Maximum number of retry attempts. Defaults to 5.
+
+        Returns:
+            InferenceResult (InferenceResult): A dataclass object containing the transaction hash and model output.
+                transaction_hash (str): Blockchain hash for the transaction
+                model_output (Dict[str, np.ndarray]): Output of the ONNX model
+
+        Raises:
+            OpenGradientError: If the inference fails.
+        """
+
+        def execute_transaction():
+            contract = self._blockchain.eth.contract(address=self._inference_hub_contract_address, abi=self.inference_abi)
+            precompile_contract = self._blockchain.eth.contract(address=PRECOMPILE_CONTRACT_ADDRESS, abi=self.precompile_abi)
+
+            inference_mode_uint8 = inference_mode.value
+            converted_model_input = convert_to_model_input(model_input)
+
+            run_function = contract.functions.run(model_cid, inference_mode_uint8, converted_model_input)
+
+            tx_hash, tx_receipt = self._send_tx_with_revert_handling(run_function)
+            parsed_logs = contract.events.InferenceResult().process_receipt(tx_receipt, errors=DISCARD)
+            if len(parsed_logs) < 1:
+                raise OpenGradientError("InferenceResult event not found in transaction logs")
+
+            # TODO: This should return a ModelOutput class object
+            model_output = convert_to_model_output(parsed_logs[0]["args"])
+            if len(model_output) == 0:
+                # check inference directly from node
+                parsed_logs = precompile_contract.events.ModelInferenceEvent().process_receipt(tx_receipt, errors=DISCARD)
+                inference_id = parsed_logs[0]["args"]["inferenceID"]
+                inference_result = self._get_inference_result_from_node(inference_id, inference_mode)
+                model_output = convert_to_model_output(inference_result)
+
+            return InferenceResult(tx_hash.hex(), model_output)
+
+        return run_with_retry(execute_transaction, max_retries)
+
+    def _send_tx_with_revert_handling(self, run_function):
+        """
+        Execute a blockchain transaction with revert error.
+
+        Args:
+            run_function: Function that executes the transaction
+
+        Returns:
+            tx_hash: Transaction hash
+            tx_receipt: Transaction receipt
+
+        Raises:
+            Exception: If transaction fails or gas estimation fails
+        """
+        nonce = self._blockchain.eth.get_transaction_count(self._wallet_account.address, "pending")
+        try:
+            estimated_gas = run_function.estimate_gas({"from": self._wallet_account.address})
+        except ContractLogicError as e:
+            try:
+                run_function.call({"from": self._wallet_account.address})
+
+            except ContractLogicError as call_err:
+                raise ContractLogicError(f"simulation failed with revert reason: {call_err.args[0]}")
+
+            raise ContractLogicError(f"simulation failed with no revert reason. Reason: {e}")
+
+        gas_limit = int(estimated_gas * 3)
+
+        transaction = run_function.build_transaction(
+            {
+                "from": self._wallet_account.address,
+                "nonce": nonce,
+                "gas": gas_limit,
+                "gasPrice": self._blockchain.eth.gas_price,
+            }
+        )
+
+        signed_tx = self._wallet_account.sign_transaction(transaction)
+        tx_hash = self._blockchain.eth.send_raw_transaction(signed_tx.raw_transaction)
+        tx_receipt = self._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=INFERENCE_TX_TIMEOUT)
+
+        if tx_receipt["status"] == 0:
+            try:
+                run_function.call({"from": self._wallet_account.address})
+
+            except ContractLogicError as call_err:
+                raise ContractLogicError(f"Transaction failed with revert reason: {call_err.args[0]}")
+
+            raise ContractLogicError(f"Transaction failed with no revert reason. Receipt: {tx_receipt}")
+
+        return tx_hash, tx_receipt
+
+    def _get_inference_result_from_node(self, inference_id: str, inference_mode: InferenceMode) -> Dict:
+        """
+        Get the inference result from node.
+
+        Args:
+            inference_id (str): Inference id for a inference request
+
+        Returns:
+            Dict: The inference result as returned by the node
+
+        Raises:
+            OpenGradientError: If the request fails or returns an error
+        """
+        try:
+            encoded_id = urllib.parse.quote(inference_id, safe="")
+            url = f"{self._api_url}/artela-network/artela-rollkit/inference/tx/{encoded_id}"
+
+            response = requests.get(url)
+            if response.status_code == 200:
+                resp = response.json()
+                inference_result = resp.get("inference_results", {})
+                if inference_result:
+                    decoded_bytes = base64.b64decode(inference_result[0])
+                    decoded_string = decoded_bytes.decode("utf-8")
+                    output = json.loads(decoded_string).get("InferenceResult", {})
+                    if output is None:
+                        raise OpenGradientError("Missing InferenceResult in inference output")
+
+                    match inference_mode:
+                        case InferenceMode.VANILLA:
+                            if "VanillaResult" not in output:
+                                raise OpenGradientError("Missing VanillaResult in inference output")
+                            if "model_output" not in output["VanillaResult"]:
+                                raise OpenGradientError("Missing model_output in VanillaResult")
+                            return {"output": output["VanillaResult"]["model_output"]}
+
+                        case InferenceMode.TEE:
+                            if "TeeNodeResult" not in output:
+                                raise OpenGradientError("Missing TeeNodeResult in inference output")
+                            if "Response" not in output["TeeNodeResult"]:
+                                raise OpenGradientError("Missing Response in TeeNodeResult")
+                            if "VanillaResponse" in output["TeeNodeResult"]["Response"]:
+                                if "model_output" not in output["TeeNodeResult"]["Response"]["VanillaResponse"]:
+                                    raise OpenGradientError("Missing model_output in VanillaResponse")
+                                return {"output": output["TeeNodeResult"]["Response"]["VanillaResponse"]["model_output"]}
+
+                            else:
+                                raise OpenGradientError("Missing VanillaResponse in TeeNodeResult Response")
+
+                        case InferenceMode.ZKML:
+                            if "ZkmlResult" not in output:
+                                raise OpenGradientError("Missing ZkmlResult in inference output")
+                            if "model_output" not in output["ZkmlResult"]:
+                                raise OpenGradientError("Missing model_output in ZkmlResult")
+                            return {"output": output["ZkmlResult"]["model_output"]}
+
+                        case _:
+                            raise OpenGradientError(f"Invalid inference mode: {inference_mode}")
+                else:
+                    return None
+
+            else:
+                raise OpenGradientError(f"Failed to get inference result: HTTP {response.status_code}")
+
+        except requests.RequestException as e:
+            raise OpenGradientError(f"Failed to get inference result: {str(e)}")
+        except OpenGradientError:
+            raise
+        except Exception as e:
+            raise OpenGradientError(f"Failed to get inference result: {str(e)}")
diff --git a/src/opengradient/x402_auth.py b/src/opengradient/client/x402_auth.py
similarity index 100%
rename from src/opengradient/x402_auth.py
rename to src/opengradient/client/x402_auth.py
diff --git a/src/opengradient/workflow_models/utils.py b/src/opengradient/workflow_models/utils.py
index 85e1309..b6db033 100644
--- a/src/opengradient/workflow_models/utils.py
+++ b/src/opengradient/workflow_models/utils.py
@@ -2,7 +2,7 @@
 
 from typing import Callable
 
-import opengradient as og
+from opengradient.client.alpha import Alpha
 
 from .constants import BLOCK_EXPLORER_URL
 from .types import WorkflowModelOutput
@@ -20,15 +20,16 @@ def create_block_explorer_link_transaction(transaction_hash: str) -> str:
     return block_explorer_url
 
 
-def read_workflow_wrapper(contract_address: str, format_function: Callable[..., str]) -> WorkflowModelOutput:
+def read_workflow_wrapper(alpha: Alpha, contract_address: str, format_function: Callable[..., str]) -> WorkflowModelOutput:
     """
     Wrapper function for reading from models through workflows.
     Args:
+        alpha (Alpha): The alpha namespace from an initialized OpenGradient client (client.alpha).
         contract_address (str): Smart contract address of the workflow
         format_function (Callable): Function for formatting the result returned by read_workflow
     """
     try:
-        result = og.alpha.read_workflow_result(contract_address)
+        result = alpha.read_workflow_result(contract_address)
 
         formatted_result = format_function(result)
         block_explorer_link = create_block_explorer_link_smart_contract(contract_address)
diff --git a/src/opengradient/workflow_models/workflow_models.py b/src/opengradient/workflow_models/workflow_models.py
index 328ddce..cb8b3a3 100644
--- a/src/opengradient/workflow_models/workflow_models.py
+++ b/src/opengradient/workflow_models/workflow_models.py
@@ -1,5 +1,7 @@
 """Repository of OpenGradient quantitative workflow models."""
 
+from opengradient.client.alpha import Alpha
+
 from .constants import (
     BTC_1_HOUR_PRICE_FORECAST_ADDRESS,
     ETH_1_HOUR_PRICE_FORECAST_ADDRESS,
@@ -13,84 +15,90 @@
 from .utils import read_workflow_wrapper
 
 
-def read_eth_usdt_one_hour_volatility_forecast() -> WorkflowModelOutput:
+def read_eth_usdt_one_hour_volatility_forecast(alpha: Alpha) -> WorkflowModelOutput:
     """
     Read from the ETH/USDT one hour volatility forecast model workflow on the OpenGradient network.
 
     More information on this model can be found at https://hub.opengradient.ai/models/OpenGradient/og-1hr-volatility-ethusdt.
     """
     return read_workflow_wrapper(
-        contract_address=ETH_USDT_1_HOUR_VOLATILITY_ADDRESS, format_function=lambda x: format(float(x.numbers["Y"].item()), ".10%")
+        alpha, contract_address=ETH_USDT_1_HOUR_VOLATILITY_ADDRESS, format_function=lambda x: format(float(x.numbers["Y"].item()), ".10%")
     )
 
 
-def read_btc_1_hour_price_forecast() -> WorkflowModelOutput:
+def read_btc_1_hour_price_forecast(alpha: Alpha) -> WorkflowModelOutput:
     """
     Read from the BTC one hour return forecast workflow on the OpenGradient network.
 
     More information on this model can be found at https://hub.opengradient.ai/models/OpenGradient/og-btc-1hr-forecast.
     """
     return read_workflow_wrapper(
+        alpha,
         contract_address=BTC_1_HOUR_PRICE_FORECAST_ADDRESS,
         format_function=lambda x: format(float(x.numbers["regression_output"].item()), ".10%"),
     )
 
 
-def read_eth_1_hour_price_forecast() -> WorkflowModelOutput:
+def read_eth_1_hour_price_forecast(alpha: Alpha) -> WorkflowModelOutput:
     """
     Read from the ETH one hour return forecast workflow on the OpenGradient network.
 
     More information on this model can be found at https://hub.opengradient.ai/models/OpenGradient/og-eth-1hr-forecast.
     """
     return read_workflow_wrapper(
+        alpha,
         contract_address=ETH_1_HOUR_PRICE_FORECAST_ADDRESS,
         format_function=lambda x: format(float(x.numbers["regression_output"].item()), ".10%"),
     )
 
 
-def read_sol_1_hour_price_forecast() -> WorkflowModelOutput:
+def read_sol_1_hour_price_forecast(alpha: Alpha) -> WorkflowModelOutput:
     """
     Read from the SOL one hour return forecast workflow on the OpenGradient network.
 
     More information on this model can be found at https://hub.opengradient.ai/models/OpenGradient/og-sol-1hr-forecast.
     """
     return read_workflow_wrapper(
+        alpha,
         contract_address=SOL_1_HOUR_PRICE_FORECAST_ADDRESS,
         format_function=lambda x: format(float(x.numbers["regression_output"].item()), ".10%"),
     )
 
 
-def read_sui_1_hour_price_forecast() -> WorkflowModelOutput:
+def read_sui_1_hour_price_forecast(alpha: Alpha) -> WorkflowModelOutput:
     """
     Read from the SUI one hour return forecast workflow on the OpenGradient network.
 
     More information on this model can be found at https://hub.opengradient.ai/models/OpenGradient/og-sui-1hr-forecast.
     """
     return read_workflow_wrapper(
+        alpha,
         contract_address=SUI_1_HOUR_PRICE_FORECAST_ADDRESS,
         format_function=lambda x: format(float(x.numbers["regression_output"].item()), ".10%"),
     )
 
 
-def read_sui_usdt_30_min_price_forecast() -> WorkflowModelOutput:
+def read_sui_usdt_30_min_price_forecast(alpha: Alpha) -> WorkflowModelOutput:
     """
     Read from the SUI/USDT pair 30 min return forecast workflow on the OpenGradient network.
 
     More information on this model can be found at https://hub.opengradient.ai/models/OpenGradient/og-30min-return-suiusdt.
     """
     return read_workflow_wrapper(
+        alpha,
         contract_address=SUI_30_MINUTE_PRICE_FORECAST_ADDRESS,
         format_function=lambda x: format(float(x.numbers["destandardized_prediction"].item()), ".10%"),
     )
 
 
-def read_sui_usdt_6_hour_price_forecast() -> WorkflowModelOutput:
+def read_sui_usdt_6_hour_price_forecast(alpha: Alpha) -> WorkflowModelOutput:
     """
     Read from the SUI/USDT pair 6 hour return forecast workflow on the OpenGradient network.
 
     More information on this model can be found at https://hub.opengradient.ai/models/OpenGradient/og-6h-return-suiusdt.
     """
     return read_workflow_wrapper(
+        alpha,
         contract_address=SUI_6_HOUR_PRICE_FORECAST_ADDRESS,
         format_function=lambda x: format(float(x.numbers["destandardized_prediction"].item()), ".10%"),
     )
diff --git a/stresstest/infer.py b/stresstest/infer.py
index 2edbbb4..e4d3ca2 100644
--- a/stresstest/infer.py
+++ b/stresstest/infer.py
@@ -10,13 +10,11 @@
 MODEL = "QmbUqS93oc4JTLMHwpVxsE39mhNxy6hpf6Py3r9oANr8aZ"
 
 
-def run_inference(input_data: dict):
-    og.infer(MODEL, og.InferenceMode.VANILLA, input_data)
-
-
 def main(private_key: str):
-    # init with private key only
-    og.init(private_key=private_key, email=None, password=None)
+    client = og.Client(private_key=private_key)
+
+    def run_inference(input_data: dict):
+        client.inference.infer(MODEL, og.InferenceMode.VANILLA, input_data)
 
     latencies, failures = stress_test_wrapper(run_inference, num_requests=NUM_REQUESTS)
 
diff --git a/stresstest/llm.py b/stresstest/llm.py
index 62f51fe..02f08e7 100644
--- a/stresstest/llm.py
+++ b/stresstest/llm.py
@@ -10,13 +10,11 @@
 MODEL = "anthropic/claude-3.5-haiku"
 
 
-def run_prompt(prompt: str):
-    og.llm_completion(MODEL, prompt, max_tokens=50)
-
-
 def main(private_key: str):
-    # init with private key only
-    og.init(private_key=private_key, email=None, password=None)
+    client = og.Client(private_key=private_key)
+
+    def run_prompt(prompt: str):
+        client.llm.completion(MODEL, prompt, max_tokens=50)
 
     latencies, failures = stress_test_wrapper(run_prompt, num_requests=NUM_REQUESTS, is_llm=True)
 
diff --git a/templates/text.mako b/templates/text.mako
index 7307ee5..03e2326 100644
--- a/templates/text.mako
+++ b/templates/text.mako
@@ -34,6 +34,8 @@ outline: [2,3]
 
     # dobj.module is None so pull module name from qualname
     parts = dobj.qualname.split('.')
+    if len(parts) < 2:
+      return '`{}`'.format(parts[0])
     app = parts[0]
     module = parts[1]
     if len(parts) > 2:
diff --git a/tests/client_test.py b/tests/client_test.py
index 8b6e12a..ad0015a 100644
--- a/tests/client_test.py
+++ b/tests/client_test.py
@@ -8,22 +8,20 @@
 sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 
 from src.opengradient.client import Client
-from src.opengradient.exceptions import OpenGradientError
 from src.opengradient.types import (
-    StreamChunk,
     TEE_LLM,
+    StreamChunk,
     TextGenerationOutput,
     x402SettlementMode,
 )
 
-
 # --- Fixtures ---
 
 
 @pytest.fixture
 def mock_web3():
     """Create a mock Web3 instance."""
-    with patch("src.opengradient.client.Web3") as mock:
+    with patch("src.opengradient.client.client.Web3") as mock:
         mock_instance = MagicMock()
         mock.return_value = mock_instance
         mock.HTTPProvider.return_value = MagicMock()
@@ -77,13 +75,14 @@ def test_client_initialization_without_auth(self, mock_web3, mock_abi_files):
             contract_address="0x" + "b" * 40,
         )
 
-        assert client._hub_user is None
-        assert client._api_url == "https://test.api.url"
-        assert client._inference_hub_contract_address == "0x" + "b" * 40
+        assert client.model_hub._hub_user is None
 
     def test_client_initialization_with_auth(self, mock_web3, mock_abi_files):
         """Test client initialization with email/password authentication."""
-        with patch("src.opengradient.client.firebase") as mock_firebase:
+        with (
+            patch("src.opengradient.client.model_hub._FIREBASE_CONFIG", {"apiKey": "fake"}),
+            patch("src.opengradient.client.model_hub.firebase") as mock_firebase,
+        ):
             mock_auth = MagicMock()
             mock_auth.sign_in_with_email_and_password.return_value = {
                 "idToken": "test_token",
@@ -100,8 +99,8 @@ def test_client_initialization_with_auth(self, mock_web3, mock_abi_files):
                 password="test_password",
             )
 
-            assert client._hub_user is not None
-            assert client._hub_user["idToken"] == "test_token"
+            assert client.model_hub._hub_user is not None
+            assert client.model_hub._hub_user["idToken"] == "test_token"
 
     def test_client_initialization_custom_llm_urls(self, mock_web3, mock_abi_files):
         """Test client initialization with custom LLM server URLs."""
@@ -117,8 +116,8 @@ def test_client_initialization_custom_llm_urls(self, mock_web3, mock_abi_files):
             og_llm_streaming_server_url=custom_streaming_url,
         )
 
-        assert client._og_llm_server_url == custom_llm_url
-        assert client._og_llm_streaming_server_url == custom_streaming_url
+        assert client.llm._og_llm_server_url == custom_llm_url
+        assert client.llm._og_llm_streaming_server_url == custom_streaming_url
 
 
 class TestAlphaProperty:
@@ -126,7 +125,7 @@ def test_alpha_lazy_initialization(self, client):
         """Test that alpha property is lazily initialized."""
         assert client._alpha is None
 
-        with patch("src.opengradient.alpha.Alpha") as mock_alpha:
+        with patch("src.opengradient.client.alpha.Alpha") as mock_alpha:
             mock_alpha_instance = MagicMock()
             mock_alpha.return_value = mock_alpha_instance
 
@@ -137,7 +136,7 @@ def test_alpha_lazy_initialization(self, client):
 
     def test_alpha_returns_same_instance(self, client):
         """Test that alpha property returns the same instance on subsequent calls."""
-        with patch("src.opengradient.alpha.Alpha") as mock_alpha:
+        with patch("src.opengradient.client.alpha.Alpha") as mock_alpha:
             mock_alpha_instance = MagicMock()
             mock_alpha.return_value = mock_alpha_instance
 
@@ -154,7 +153,10 @@ def test_alpha_returns_same_instance(self, client):
 class TestAuthentication:
     def test_login_to_hub_success(self, mock_web3, mock_abi_files):
         """Test successful login to hub."""
-        with patch("src.opengradient.client.firebase") as mock_firebase:
+        with (
+            patch("src.opengradient.client.model_hub._FIREBASE_CONFIG", {"apiKey": "fake"}),
+            patch("src.opengradient.client.model_hub.firebase") as mock_firebase,
+        ):
             mock_auth = MagicMock()
             mock_auth.sign_in_with_email_and_password.return_value = {
                 "idToken": "success_token",
@@ -172,11 +174,14 @@ def test_login_to_hub_success(self, mock_web3, mock_abi_files):
             )
 
             mock_auth.sign_in_with_email_and_password.assert_called_once_with("user@test.com", "password123")
-            assert client._hub_user["idToken"] == "success_token"
+            assert client.model_hub._hub_user["idToken"] == "success_token"
 
     def test_login_to_hub_failure(self, mock_web3, mock_abi_files):
         """Test login failure raises exception."""
-        with patch("src.opengradient.client.firebase") as mock_firebase:
+        with (
+            patch("src.opengradient.client.model_hub._FIREBASE_CONFIG", {"apiKey": "fake"}),
+            patch("src.opengradient.client.model_hub.firebase") as mock_firebase,
+        ):
             mock_auth = MagicMock()
             mock_auth.sign_in_with_email_and_password.side_effect = Exception("Invalid credentials")
             mock_firebase.initialize_app.return_value.auth.return_value = mock_auth
@@ -198,14 +203,14 @@ def test_login_to_hub_failure(self, mock_web3, mock_abi_files):
 class TestLLMCompletion:
     def test_llm_completion_success(self, client):
         """Test successful LLM completion."""
-        with patch.object(client, "_tee_llm_completion") as mock_tee:
+        with patch.object(client.llm, "_tee_llm_completion") as mock_tee:
             mock_tee.return_value = TextGenerationOutput(
                 transaction_hash="external",
                 completion_output="Hello! How can I help?",
                 payment_hash="0xpayment123",
             )
 
-            result = client.llm_completion(
+            result = client.llm.completion(
                 model=TEE_LLM.GPT_4O,
                 prompt="Hello",
                 max_tokens=100,
@@ -218,7 +223,7 @@ def test_llm_completion_success(self, client):
 class TestLLMChat:
     def test_llm_chat_success_non_streaming(self, client):
         """Test successful non-streaming LLM chat."""
-        with patch.object(client, "_tee_llm_chat") as mock_tee:
+        with patch.object(client.llm, "_tee_llm_chat") as mock_tee:
             mock_tee.return_value = TextGenerationOutput(
                 transaction_hash="external",
                 chat_output={"role": "assistant", "content": "Hi there!"},
@@ -226,7 +231,7 @@ def test_llm_chat_success_non_streaming(self, client):
                 payment_hash="0xpayment",
             )
 
-            result = client.llm_chat(
+            result = client.llm.chat(
                 model=TEE_LLM.GPT_4O,
                 messages=[{"role": "user", "content": "Hello"}],
                 stream=False,
@@ -237,14 +242,14 @@ def test_llm_chat_success_non_streaming(self, client):
 
     def test_llm_chat_streaming(self, client):
         """Test streaming LLM chat."""
-        with patch.object(client, "_tee_llm_chat_stream_sync") as mock_stream:
+        with patch.object(client.llm, "_tee_llm_chat_stream_sync") as mock_stream:
             mock_chunks = [
                 StreamChunk(choices=[], model="gpt-4o"),
                 StreamChunk(choices=[], model="gpt-4o", is_final=True),
             ]
             mock_stream.return_value = iter(mock_chunks)
 
-            result = client.llm_chat(
+            result = client.llm.chat(
                 model=TEE_LLM.GPT_4O,
                 messages=[{"role": "user", "content": "Hello"}],
                 stream=True,
diff --git a/tests/utils_test.py b/tests/utils_test.py
index b5eb110..082cb8d 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -1,16 +1,10 @@
 import json
-import os
-import sys
 
 import numpy as np
 import pytest
 
-# Add the src directory to the Python path
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-
-# Import from src/opengradient
-import src.opengradient.types as types
-import src.opengradient.utils as utils
+import opengradient.client._conversions as utils
+import opengradient.types as types
 
 
 @pytest.fixture