diff --git a/.gitignore b/.gitignore
index a9729050..a867929d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,7 @@ __pycache__/
 .DS_Store
 
 .venv/
-venv/*
+venv/
 *venv/*
 *.venv/*
 
diff --git a/README.md b/README.md
index 27b82ca8..2ce854cc 100644
--- a/README.md
+++ b/README.md
@@ -284,6 +284,55 @@ run = run.score([check_keywords], expected_keywords="expected_keywords")
 
 ---
 
+## Saving Completion Feedback
+
+Track user feedback on prompt completions to improve your prompts with DSPy optimization:
+
+```python
+import zeroeval as ze
+
+# Initialize client
+ze.init()
+
+# Send positive feedback
+feedback = ze.send_feedback(
+    prompt_slug="customer-support",
+    completion_id="completion-uuid-123",
+    thumbs_up=True,
+    reason="Excellent response, very helpful"
+)
+
+# Send negative feedback with expected output
+feedback = ze.send_feedback(
+    prompt_slug="customer-support", 
+    completion_id="completion-uuid-456",
+    thumbs_up=False,
+    reason="Response was too formal",
+    expected_output="Should be more casual and friendly",
+    metadata={"user_id": "user-789", "source": "production"}
+)
+```
+
+### Parameters
+
+- **prompt_slug** _(str, required)_ – The slug of the prompt
+- **completion_id** _(str, required)_ – UUID of the completion to provide feedback on
+- **thumbs_up** _(bool, required)_ – True for positive feedback, False for negative
+- **reason** _(str, optional)_ – Explanation of the feedback
+- **expected_output** _(str, optional)_ – Description of what the expected output should be. This field is automatically used by ZeroEval for **tuning datasets and DSPy prompt optimization** to create stronger training examples.
+- **metadata** _(dict, optional)_ – Additional metadata to attach to the feedback
+
+### Integration with Prompt Tuning
+
+Feedback submitted via `send_feedback` is automatically linked to the prompt version used for the completion. When you provide both `reason` and `expected_output`, ZeroEval creates stronger training examples for DSPy optimization:
+
+- **`reason`** helps the optimizer understand what makes a response good or bad
+- **`expected_output`** provides a concrete example of the ideal response, which DSPy uses to generate improved prompts
+
+If the completion was traced with a `span_id`, the feedback is mirrored to your tuning datasets automatically, making it available for prompt optimization runs in the ZeroEval platform.
+
+---
+
 ## Streaming & tracing
 
 • **Streaming responses** – streaming guide: https://docs.zeroeval.com/streaming (coming soon)
diff --git a/examples_v2/README.md b/examples_v2/README.md
index 4d0b745a..c9dc71bc 100644
--- a/examples_v2/README.md
+++ b/examples_v2/README.md
@@ -14,6 +14,10 @@ This directory contains organized, focused examples for ZeroEval SDK features.
   - Weighted variant selection
   - Automatic choice tracking
 
+- **`tuning/`** - Examples for Prompt Tuning and Optimization
+  - Customer support agent with feedback loop
+  - Prompt versioning with ze.prompt()
+
 ## Getting Started
 
 1. **Install dependencies**:
diff --git a/examples_v2/tuning/README.md b/examples_v2/tuning/README.md
new file mode 100644
index 00000000..cf7427a0
--- /dev/null
+++ b/examples_v2/tuning/README.md
@@ -0,0 +1,60 @@
+# Prompt Tuning Examples
+
+This directory contains examples demonstrating ZeroEval's prompt tuning and optimization features.
+
+## Core Concepts
+
+Prompt tuning in ZeroEval works through a feedback loop:
+
+1. **Define Prompt**: Use `ze.prompt()` to register a prompt and bind variables.
+2. **Trace Execution**: Run your agent; the SDK automatically traces the inputs and outputs.
+3. **Send Feedback**: Use `ze.send_feedback()` (or the direct API) to signal what was good or bad about the completion.
+4. **Optimize**: ZeroEval (and integrated optimizers like DSPy) uses this feedback to generate better prompt versions.
+
+## Examples
+
+### 1. Customer Support Agent (`customer_support_agent.py`)
+
+A simple example of a support agent that uses `ze.prompt()` for versioned, managed prompts. This demonstrates the basic setup without the automated feedback loop.
+
+### 2. Customer Support Agent with SDK Feedback (`bookstore_agent_with_feedback.py`)
+
+An advanced example that implements a complete automated feedback loop using the ZeroEval SDK.
+
+**Key Features:**
+
+- **Automated Evaluator**: Uses a powerful model (GPT-4o) to grade the agent's responses.
+- **Feedback Submission**: Uses `ze.send_feedback()` to programmatically submit the evaluator's scores (thumbs up/down) and reasoning.
+- **Metadata Tracking**: Attaches metadata (like scores and evaluator model) to the feedback.
+
+**Run it:**
+
+```bash
+python tuning/bookstore_agent_with_feedback.py
+```
+
+### 3. Customer Support Agent with API Feedback (`bookstore_agent_with_api_feedback.py`)
+
+Demonstrates how to submit feedback using direct HTTP calls to the ZeroEval API, bypassing the SDK's `ze.send_feedback` helper. This is useful for frontend applications or systems where the SDK cannot be installed.
+
+**Key Features:**
+
+- **Direct API Integration**: Uses `requests` to hit the `/v1/prompts/{slug}/completions/{id}/feedback` endpoint.
+- **Payload Structure**: Shows exactly what JSON payload the backend expects.
+- **Flexible Integration**: Ideal for custom pipelines or non-Python environments.
+
+**Run it:**
+
+```bash
+python tuning/bookstore_agent_with_api_feedback.py
+```
+
+## Setup
+
+Ensure you have your `.env` file set up in the parent directory with:
+
+- `ZEROEVAL_API_KEY`: Your ZeroEval API key (required, starts with `sk_ze_...`)
+- `OPENAI_API_KEY`: Your OpenAI API key (required)
+- `ZEROEVAL_API_URL`: (Optional) URL of your ZeroEval instance (default: `http://localhost:8000`)
+
+**Important**: All examples now pull credentials from environment variables. Never commit hardcoded API keys to version control.
diff --git a/examples_v2/tuning/bookstore_agent.py b/examples_v2/tuning/bookstore_agent.py
new file mode 100644
index 00000000..127b3af4
--- /dev/null
+++ b/examples_v2/tuning/bookstore_agent.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+Customer Support Agent with Tuning
+=================================
+
+This example demonstrates how to build a customer support agent using ZeroEval's
+tuning features. It uses `ze.prompt()` to manage the prompt and `ze.send_feedback()`
+to provide signals for optimization.
+
+Key concepts:
+1. `ze.prompt()`: Defines the prompt and binds variables for interpolation
+2. Automatic Tracing: The SDK automatically traces OpenAI calls
+3. Interactive Mode: You can chat with the agent and see how it responds
+"""
+
+import os
+import uuid
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+# Load environment variables BEFORE importing zeroeval
+env_path = Path(__file__).parent.parent / ".env"
+load_dotenv(env_path)
+
+import openai
+import zeroeval as ze
+
+# 1. Initialize ZeroEval
+# Ensure you have ZEROEVAL_API_KEY and ZEROEVAL_API_URL set in your environment
+ze.init(
+    api_key=os.getenv("ZEROEVAL_API_KEY"),
+    api_url=os.getenv("ZEROEVAL_API_URL", "http://localhost:8000"),
+)
+
+def customer_support_agent(user_query: str, user_context: dict = None, conversation_history: list = None):
+    """
+    A simple customer support agent that uses a managed prompt and maintains conversation history.
+    """
+    if user_context is None:
+        user_context = {}
+    if conversation_history is None:
+        conversation_history = []
+
+    # 2. Define the prompt using ze.prompt()
+    # This registers the prompt with ZeroEval (if not exists) and allows for versioning.
+    # The 'content' is your base prompt. You can use {{variable}} syntax.
+    # 'variables' are passed for interpolation and tracking.
+    
+    prompt_name = "bookstore-support-agent"
+    
+    system_instruction = ze.prompt(
+        name=prompt_name,
+        content="""You are Elena, a passionate book enthusiast and customer support specialist at Bibliophile Books. You've worked in the bookstore for 5 years and genuinely love helping people discover their next great read.
+
+Your personality:
+- Warm and personable, like chatting with a knowledgeable friend at a bookshop
+- Enthusiastic about books and reading
+- Patient and empathetic when customers have issues
+- Professional but not overly formal
+- You use the customer's name naturally in conversation
+
+Customer Information:
+- Name: {{user_name}}
+- Membership Level: {{membership}}
+
+Guidelines:
+1. Address {{user_name}} directly and warmly (but don't say "Hi {{user_name}}" in every message if you're in an ongoing conversation)
+2. For Gold members: Remember they have free shipping, priority support, and 15% off all purchases
+3. For Standard members: Offer helpful service while mentioning Gold membership benefits when relevant
+4. Keep responses concise but friendly (2-4 sentences for simple queries)
+5. If you don't know something or can't help, offer to connect them with a specialist
+6. Never use placeholder text like "[Your Name]" - you are Elena
+7. End naturally without formal sign-offs unless it's clearly the end of the conversation
+8. IMPORTANT: Remember information from the conversation history and don't ask for things the customer already told you
+
+Respond directly to their query in a helpful, personable way.""",
+        variables={
+            "user_name": user_context.get("name", "there"),
+            "membership": user_context.get("membership", "Standard")
+        }
+    )
+
+    # Initialize OpenAI client (ZeroEval automatically instruments this)
+    client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+    print(f"\n--- Sending Request to AI ({prompt_name}) ---")
+    
+    # Build messages with conversation history
+    messages = [{"role": "system", "content": system_instruction}]
+    messages.extend(conversation_history)
+    messages.append({"role": "user", "content": user_query})
+    
+    # 3. Call the Model
+    # The SDK intercepts this call:
+    # - Detects the <zeroeval> metadata from ze.prompt()
+    # - Interpolates variables into the content
+    # - Traces the execution
+    response = client.chat.completions.create(
+        model="gpt-4o-mini", # Use a cost-effective model
+        messages=messages,
+        temperature=0.7
+    )
+
+    completion_text = response.choices[0].message.content
+    completion_id = response.id
+    
+    return completion_text, completion_id, prompt_name
+
+def main():
+    # Example interaction
+    print("\n=== Bookstore Support Agent (Type 'exit' to quit) ===")
+    
+    # We'll assume a fixed user context for this session
+    user_context = {
+        "name": "Alice",
+        "membership": "Gold" # VIP customer
+    }
+    print(f"Context: User={user_context['name']}, Membership={user_context['membership']}\n")
+    
+    # Initialize conversation history
+    conversation_history = []
+    
+    # Agent introduces itself
+    intro_query = "Hello! Please introduce yourself and ask how you can help me today."
+    response_text, _, _ = customer_support_agent(intro_query, user_context, conversation_history)
+    print(f"Elena: {response_text}\n")
+    
+    # Add intro to history
+    conversation_history.append({"role": "user", "content": intro_query})
+    conversation_history.append({"role": "assistant", "content": response_text})
+    
+    while True:
+        try:
+            user_query = input("\nEnter your query: ").strip()
+            if not user_query:
+                continue
+                
+            if user_query.lower() in ('exit', 'quit'):
+                print("Goodbye!")
+                break
+                
+            response_text, completion_id, prompt_slug = customer_support_agent(user_query, user_context, conversation_history)
+            
+            print(f"\nElena: {response_text}")
+            
+            # Add to conversation history
+            conversation_history.append({"role": "user", "content": user_query})
+            conversation_history.append({"role": "assistant", "content": response_text})
+            
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except Exception as e:
+            print(f"\nError: {e}")
+            print("Check your ZEROEVAL_API_KEY and OPENAI_API_KEY.")
+            break
+
+if __name__ == "__main__":
+    main()
diff --git a/examples_v2/tuning/bookstore_agent_with_api_feedback.py b/examples_v2/tuning/bookstore_agent_with_api_feedback.py
new file mode 100644
index 00000000..c73d3878
--- /dev/null
+++ b/examples_v2/tuning/bookstore_agent_with_api_feedback.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+"""
+Customer Support Agent with API Feedback Loop
+===================================================
+
+This example demonstrates how to submit feedback using the ZeroEval API directly,
+bypassing the SDK's `ze.send_feedback` helper. This is useful for:
+1. Frontend applications calling the backend directly
+2. Systems where the SDK is not installed
+3. Custom integrations
+
+Key concepts:
+- `POST /v1/prompts/{slug}/completions/{id}/feedback`: The feedback endpoint
+- Direct API interaction
+"""
+
+import os
+import json
+import requests
+from pathlib import Path
+from dotenv import load_dotenv
+
+# Load environment variables BEFORE importing zeroeval
+env_path = Path(__file__).parent.parent / ".env"
+load_dotenv(env_path)
+
+import openai
+import zeroeval as ze
+
+# Configuration
+API_URL = os.getenv("ZEROEVAL_API_URL", "http://localhost:8000")
+API_KEY = os.getenv("ZEROEVAL_API_KEY")  # Use your ZeroEval API Key
+
+# 1. Initialize ZeroEval
+ze.init(
+    api_key=API_KEY,
+    api_url=API_URL,
+)
+
+# Initialize OpenAI client
+client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+def customer_support_agent(user_query: str, user_context: dict = None, conversation_history: list = None):
+    """
+    A simple customer support agent that uses a managed prompt and maintains conversation history.
+    """
+    if user_context is None:
+        user_context = {}
+    if conversation_history is None:
+        conversation_history = []
+
+    # 2. Define the prompt using ze.prompt()
+    prompt_name = "bookstore-support-agent-with-api-feedback"
+    
+    system_instruction = ze.prompt(
+        name=prompt_name,
+        content="""You are Elena, a passionate book enthusiast and customer support specialist at Bibliophile Books. You've worked in the bookstore for 5 years and genuinely love helping people discover their next great read.
+
+Your personality:
+- Warm and personable, like chatting with a knowledgeable friend at a bookshop
+- Enthusiastic about books and reading
+- Patient and empathetic when customers have issues
+- Professional but not overly formal
+- You use the customer's name naturally in conversation
+
+Customer Information:
+- Name: {{user_name}}
+- Membership Level: {{membership}}
+
+Guidelines:
+- Address {{user_name}} directly and warmly
+- For Gold members: Remember they have free shipping, priority support, and 15% off all purchases
+- For Standard members: Offer helpful service while mentioning Gold membership benefits when relevant
+- Keep responses concise but friendly
+- If you don't know something or can't help, offer to connect them with a specialist
+- Never use placeholder text like "[Your Name]" - you are Elena
+
+Respond directly to their query in a helpful, personable way.""",
+        variables={
+            "user_name": user_context.get("name", "there"),
+            "membership": user_context.get("membership", "Standard")
+        }
+    )
+
+    print(f"\n--- Sending Request to AI ({prompt_name}) ---")
+    
+    # Build messages with conversation history
+    messages = [{"role": "system", "content": system_instruction}]
+    messages.extend(conversation_history)
+    messages.append({"role": "user", "content": user_query})
+    
+    # 3. Call the Model
+    # The SDK intercepts this call and tracks the completion_id
+    response = client.chat.completions.create(
+        model="gpt-4o-mini", # Use a cost-effective model for the agent
+        messages=messages,
+        temperature=0.7
+    )
+
+    completion_text = response.choices[0].message.content
+    completion_id = response.id
+    
+    return completion_text, completion_id, prompt_name
+
+def evaluate_response(user_query: str, agent_response: str):
+    """
+    Uses a powerful model (Evaluator) to grade the agent's response.
+    Returns (is_good: bool, reason: str)
+    """
+    print("\n--- Running Evaluator (GPT-4o) ---")
+    
+    eval_prompt = f"""You are an expert customer support quality assurance specialist. 
+    Your job is to evaluate a customer support response.
+
+    User Query: "{user_query}"
+    Agent Response: "{agent_response}"
+
+    Criteria:
+    1. Is the tone warm and professional?
+    2. Is the information accurate and helpful?
+    3. Does it address the user's specific query?
+
+    Output strictly in JSON format with these fields:
+    - "score": 1 to 5 (5 being perfect)
+    - "reason": A brief explanation of the score
+    - "thumbs_up": true if score >= 4, else false
+    """
+
+    response = client.chat.completions.create(
+        model="gpt-4o", # Use a powerful model for evaluation
+        messages=[{"role": "user", "content": eval_prompt}],
+        temperature=0,
+        response_format={"type": "json_object"}
+    )
+    
+    try:
+        result = json.loads(response.choices[0].message.content)
+        return result
+    except Exception as e:
+        print(f"Error parsing evaluation: {e}")
+        return {"thumbs_up": True, "reason": "Failed to parse evaluation", "score": 5}
+
+def send_feedback_via_api(prompt_slug, completion_id, thumbs_up, reason=None, expected_output=None, metadata=None):
+    """
+    Sends feedback directly using requests.post to the ZeroEval API.
+    """
+    url = f"{API_URL}/v1/prompts/{prompt_slug}/completions/{completion_id}/feedback"
+    
+    payload = {
+        "thumbs_up": thumbs_up,
+        "reason": reason,
+        "expected_output": expected_output,
+        "metadata": metadata or {}
+    }
+    
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json"
+    }
+    
+    try:
+        print(f"\n[API] POST {url}")
+        resp = requests.post(url, json=payload, headers=headers)
+        resp.raise_for_status()
+        print("✓ API Feedback submitted successfully")
+        return resp.json()
+    except requests.exceptions.HTTPError as e:
+        print(f"❌ API Request failed: {e}")
+        print(f"Response: {e.response.text}")
+        return None
+    except Exception as e:
+        print(f"❌ Error sending feedback: {e}")
+        return None
+
+def main():
+    # Example interaction
+    print("\n=== Bookstore Support Agent with API Feedback (Type 'exit' to quit) ===")
+    
+    user_context = {
+        "name": "Alice",
+        "membership": "Gold" # VIP customer
+    }
+    print(f"Context: User={user_context['name']}, Membership={user_context['membership']}\n")
+    
+    conversation_history = []
+    
+    while True:
+        try:
+            user_query = input("\nEnter your query: ").strip()
+            if not user_query:
+                continue
+                
+            if user_query.lower() in ('exit', 'quit'):
+                print("Goodbye!")
+                break
+                
+            # 1. Get response from the agent
+            response_text, completion_id, prompt_slug = customer_support_agent(
+                user_query, 
+                user_context, 
+                conversation_history
+            )
+            
+            print(f"\nElena: {response_text}")
+            print(f"\n[DEBUG] OpenAI completion_id: {completion_id}")
+            print(f"[DEBUG] Prompt slug: {prompt_slug}")
+            
+            # 2. Generate feedback using a powerful model
+            # In a real system, this might happen asynchronously or be sampled
+            eval_result = evaluate_response(user_query, response_text)
+            
+            print(f"\n[Evaluator] Score: {eval_result.get('score')}/5")
+            print(f"[Evaluator] Reason: {eval_result.get('reason')}")
+            print(f"[Evaluator] Verdict: {'👍 Thumbs Up' if eval_result.get('thumbs_up') else '👎 Thumbs Down'}")
+            
+            # 3. Submit feedback via API directly
+            send_feedback_via_api(
+                prompt_slug=prompt_slug,
+                completion_id=completion_id,
+                thumbs_up=eval_result.get("thumbs_up", True),
+                reason=eval_result.get("reason"),
+                metadata={
+                    "score": eval_result.get("score"),
+                    "evaluator_model": "gpt-4o",
+                    "source": "direct_api"
+                }
+            )
+            
+            # Add to conversation history
+            conversation_history.append({"role": "user", "content": user_query})
+            conversation_history.append({"role": "assistant", "content": response_text})
+            
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except Exception as e:
+            print(f"\nError: {e}")
+            import traceback
+            traceback.print_exc()
+            break
+
+if __name__ == "__main__":
+    main()
+
diff --git a/examples_v2/tuning/bookstore_agent_with_feedback.py b/examples_v2/tuning/bookstore_agent_with_feedback.py
new file mode 100644
index 00000000..8452650b
--- /dev/null
+++ b/examples_v2/tuning/bookstore_agent_with_feedback.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+Customer Support Agent with Tuning and Feedback Loop
+===================================================
+
+This example is an enhanced version of `customer_support_agent.py` that adds an
+automated feedback loop. It demonstrates how to:
+
+1. Use `ze.prompt()` to manage prompts
+2. Automatically trace OpenAI calls
+3. **New**: Use a powerful model (evaluator) to critique the agent's responses
+4. **New**: Submit this feedback using `ze.send_feedback()` to improve the prompt over time
+
+Key concepts:
+- `ze.send_feedback()`: Submits programmatic feedback (thumbs up/down, reason) associated with a completion
+- Automated Evaluation: Using a stronger model to grade a faster/cheaper model
+"""
+
+import os
+from pathlib import Path
+import json
+
+from dotenv import load_dotenv
+
+# Load environment variables BEFORE importing zeroeval
+env_path = Path(__file__).parent.parent / ".env"
+load_dotenv(env_path)
+
+import openai
+import zeroeval as ze
+
+# 1. Initialize ZeroEval
+# Ensure you have ZEROEVAL_API_KEY and ZEROEVAL_API_URL set in your environment
+ze.init(
+    api_key=os.getenv("ZEROEVAL_API_KEY"),
+    api_url=os.getenv("ZEROEVAL_API_URL", "http://localhost:8000"),
+)
+
+# Initialize OpenAI client
+client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+def customer_support_agent(user_query: str, user_context: dict = None, conversation_history: list = None):
+    """
+    A simple customer support agent that uses a managed prompt and maintains conversation history.
+    """
+    if user_context is None:
+        user_context = {}
+    if conversation_history is None:
+        conversation_history = []
+
+    # 2. Define the prompt using ze.prompt()
+    prompt_name = "bookstore-support-agent-with-sdk-feedback"
+    
+    system_instruction = ze.prompt(
+        name=prompt_name,
+        content="""You are Elena, a passionate book enthusiast and customer support specialist at Bibliophile Books. You've worked in the bookstore for 5 years and genuinely love helping people discover their next great read.
+
+Your personality:
+- Warm and personable, like chatting with a knowledgeable friend at a bookshop
+- Enthusiastic about books and reading
+- Patient and empathetic when customers have issues
+- Professional but not overly formal
+- You use the customer's name naturally in conversation
+
+Customer Information:
+- Name: {{user_name}}
+- Membership Level: {{membership}}
+
+Guidelines:
+- Address {{user_name}} directly and warmly
+- For Gold members: Remember they have free shipping, priority support, and 15% off all purchases
+- For Standard members: Offer helpful service while mentioning Gold membership benefits when relevant
+- Keep responses concise but friendly
+- If you don't know something or can't help, offer to connect them with a specialist
+- Never use placeholder text like "[Your Name]" - you are Elena
+
+Respond directly to their query in a helpful, personable way.""",
+        variables={
+            "user_name": user_context.get("name", "there"),
+            "membership": user_context.get("membership", "Standard")
+        }
+    )
+
+    print(f"\n--- Sending Request to AI ({prompt_name}) ---")
+    
+    # Build messages with conversation history
+    messages = [{"role": "system", "content": system_instruction}]
+    messages.extend(conversation_history)
+    messages.append({"role": "user", "content": user_query})
+    
+    # 3. Call the Model
+    # The SDK intercepts this call and tracks the completion_id
+    response = client.chat.completions.create(
+        model="gpt-4o-mini", # Use a cost-effective model for the agent
+        messages=messages,
+        temperature=0.7
+    )
+
+    completion_text = response.choices[0].message.content
+    completion_id = response.id
+    
+    return completion_text, completion_id, prompt_name
+
+def evaluate_response(user_query: str, agent_response: str):
+    """
+    Uses a powerful model (Evaluator) to grade the agent's response.
+    Returns (is_good: bool, reason: str)
+    """
+    print("\n--- Running Evaluator (GPT-4o) ---")
+    
+    eval_prompt = f"""You are an expert customer support quality assurance specialist. 
+    Your job is to evaluate a customer support response.
+
+    User Query: "{user_query}"
+    Agent Response: "{agent_response}"
+
+    Criteria:
+    1. Is the tone warm and professional?
+    2. Is the information accurate and helpful?
+    3. Does it address the user's specific query?
+
+    Output strictly in JSON format with these fields:
+    - "score": 1 to 5 (5 being perfect)
+    - "reason": A brief explanation of the score
+    - "thumbs_up": true if score >= 4, else false
+    """
+
+    response = client.chat.completions.create(
+        model="gpt-4o", # Use a powerful model for evaluation
+        messages=[{"role": "user", "content": eval_prompt}],
+        temperature=0,
+        response_format={"type": "json_object"}
+    )
+    
+    try:
+        result = json.loads(response.choices[0].message.content)
+        return result
+    except Exception as e:
+        print(f"Error parsing evaluation: {e}")
+        return {"thumbs_up": True, "reason": "Failed to parse evaluation", "score": 5}
+
+def main():
+    # Example interaction
+    print("\n=== Bookstore Support Agent with Feedback Loop (Type 'exit' to quit) ===")
+    
+    user_context = {
+        "name": "Alice",
+        "membership": "Gold" # VIP customer
+    }
+    print(f"Context: User={user_context['name']}, Membership={user_context['membership']}\n")
+    
+    conversation_history = []
+    
+    while True:
+        try:
+            user_query = input("\nEnter your query: ").strip()
+            if not user_query:
+                continue
+                
+            if user_query.lower() in ('exit', 'quit'):
+                print("Goodbye!")
+                break
+                
+            # 1. Get response from the agent
+            response_text, completion_id, prompt_slug = customer_support_agent(
+                user_query, 
+                user_context, 
+                conversation_history
+            )
+            
+            print(f"\nElena: {response_text}")
+            print(f"\n[DEBUG] OpenAI completion_id: {completion_id}")
+            print(f"[DEBUG] Prompt slug: {prompt_slug}")
+            
+            # 2. Generate feedback using a powerful model
+            # In a real system, this might happen asynchronously or be sampled
+            eval_result = evaluate_response(user_query, response_text)
+            
+            print(f"\n[Evaluator] Score: {eval_result.get('score')}/5")
+            print(f"[Evaluator] Reason: {eval_result.get('reason')}")
+            print(f"[Evaluator] Verdict: {'👍 Thumbs Up' if eval_result.get('thumbs_up') else '👎 Thumbs Down'}")
+            
+            # 3. Submit feedback to ZeroEval
+            # This signals to the optimizer which responses were good/bad
+            ze.send_feedback(
+                prompt_slug=prompt_slug,
+                completion_id=completion_id,
+                thumbs_up=eval_result.get("thumbs_up", True),
+                reason=eval_result.get("reason"),
+                metadata={
+                    "score": eval_result.get("score"),
+                    "evaluator_model": "gpt-4o"
+                }
+            )
+            print("✓ Feedback submitted to ZeroEval")
+            
+            # Add to conversation history
+            conversation_history.append({"role": "user", "content": user_query})
+            conversation_history.append({"role": "assistant", "content": response_text})
+            
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except Exception as e:
+            print(f"\nError: {e}")
+            import traceback
+            traceback.print_exc()
+            break
+
+if __name__ == "__main__":
+    main()
+
diff --git a/pyproject.toml b/pyproject.toml
index 6961d648..224e382d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "zeroeval"
-version = "0.6.121"
+version = "0.6.122"
 description = "ZeroEval SDK"
 readme = "README.md"
 authors = [
diff --git a/src/zeroeval/__init__.py b/src/zeroeval/__init__.py
index 3204c464..8a56f08d 100644
--- a/src/zeroeval/__init__.py
+++ b/src/zeroeval/__init__.py
@@ -180,6 +180,75 @@ def get(self, slug: str, **kwargs):
 
 prompts = _PromptsNamespace()
 
+
+def log_completion(
+    *,
+    prompt_slug: str,
+    prompt_id: str,
+    prompt_version_id: str,
+    messages: list,
+    input_text: Optional[str] = None,
+    output_text: Optional[str] = None,
+    model_id: Optional[str] = None,
+    metadata: Optional[dict] = None,
+    duration_ms: Optional[float] = None,
+    prompt_tokens: Optional[int] = None,
+    completion_tokens: Optional[int] = None,
+    total_tokens: Optional[int] = None,
+    cost: Optional[float] = None,
+    has_error: bool = False,
+    error_message: Optional[str] = None,
+    span_id: Optional[str] = None,
+):
+    """
+    Log a completion for a specific prompt.
+    
+    This automatically tracks prompt usage without requiring manual wrapping.
+    """
+    client = _ensure_prompt_client()
+    return client.log_completion(
+        prompt_slug=prompt_slug,
+        prompt_id=prompt_id,
+        prompt_version_id=prompt_version_id,
+        messages=messages,
+        input_text=input_text,
+        output_text=output_text,
+        model_id=model_id,
+        metadata=metadata,
+        duration_ms=duration_ms,
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=total_tokens,
+        cost=cost,
+        has_error=has_error,
+        error_message=error_message,
+        span_id=span_id,
+    )
+
+
+def send_feedback(
+    *,
+    prompt_slug: str,
+    completion_id: str,
+    thumbs_up: bool,
+    reason: Optional[str] = None,
+    expected_output: Optional[str] = None,
+    metadata: Optional[dict] = None,
+):
+    """
+    Send feedback for a specific completion.
+    """
+    client = _ensure_prompt_client()
+    return client.send_feedback(
+        prompt_slug=prompt_slug,
+        completion_id=completion_id,
+        thumbs_up=thumbs_up,
+        reason=reason,
+        expected_output=expected_output,
+        metadata=metadata,
+    )
+
+
 # Define what's exported
 __all__ = [
     # Core functionality
@@ -192,7 +261,6 @@ def get(self, slug: str, **kwargs):
     # Providers
     "ZeroEvalOTLPProvider",
     "SingleProcessorProvider",
-
     # Observability
     "tracer",
     "span",
@@ -211,6 +279,9 @@ def get(self, slug: str, **kwargs):
     "PromptClient",
     "get_prompt",
     "prompts",
+    # Completion logging and feedback
+    "log_completion",
+    "send_feedback",
 ]
 
 # Version info
diff --git a/src/zeroeval/client.py b/src/zeroeval/client.py
index 5b8c6600..72d2a88b 100644
--- a/src/zeroeval/client.py
+++ b/src/zeroeval/client.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 import os
 import re
 from typing import Any, Dict, Optional, Tuple
@@ -17,6 +18,8 @@
 _SLUG_RE = re.compile(r"^[a-z0-9-]+$")
 _TAG_RE = re.compile(r"^[a-z0-9-]+$")
 
+logger = logging.getLogger(__name__)
+
 
 class ZeroEval:
     def __init__(
@@ -322,4 +325,146 @@ def _post_process(
             return decorated
         return prompt
 
+    # ---- New Prompt Completion and Feedback API ----
+
+    def log_completion(
+        self,
+        *,
+        prompt_slug: str,
+        prompt_id: str,
+        prompt_version_id: str,
+        messages: list[dict[str, Any]],
+        input_text: Optional[str] = None,
+        output_text: Optional[str] = None,
+        model_id: Optional[str] = None,
+        metadata: Optional[dict[str, Any]] = None,
+        duration_ms: Optional[float] = None,
+        prompt_tokens: Optional[int] = None,
+        completion_tokens: Optional[int] = None,
+        total_tokens: Optional[int] = None,
+        cost: Optional[float] = None,
+        has_error: bool = False,
+        error_message: Optional[str] = None,
+        span_id: Optional[str] = None,
+    ) -> dict[str, Any]:
+        """
+        Log a completion for a specific prompt and version.
+        This is used to track prompt usage automatically.
+        
+        Args:
+            prompt_slug: The slug of the prompt
+            prompt_id: UUID of the prompt
+            prompt_version_id: UUID of the prompt version
+            messages: Array of message objects in OpenAI format
+            input_text: Optional text representation of input
+            output_text: Optional text representation of output
+            model_id: Optional model identifier used
+            metadata: Optional additional metadata
+            duration_ms: Optional execution duration in milliseconds
+            prompt_tokens: Optional number of prompt tokens
+            completion_tokens: Optional number of completion tokens
+            total_tokens: Optional total token count
+            cost: Optional cost in USD
+            has_error: Whether the completion had an error
+            error_message: Optional error message
+            span_id: Optional span ID for trace linking
+            
+        Returns:
+            The created completion record
+        """
+        # Extract project_id from API key context (handled by backend)
+        url = f"{self._base_url}/projects/{{project_id}}/prompts/{prompt_slug}/completions"
+        
+        payload = {
+            "prompt_id": prompt_id,
+            "prompt_version_id": prompt_version_id,
+            "model_id": model_id,
+            "messages": messages,
+            "input_text": input_text,
+            "output_text": output_text,
+            "metadata": metadata or {},
+            "duration_ms": duration_ms,
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
+            "cost": cost,
+            "has_error": has_error,
+            "error_message": error_message,
+            "span_id": span_id,
+        }
+        
+        # Remove None values
+        payload = {k: v for k, v in payload.items() if v is not None}
+        
+        resp = requests.post(url, headers=self._headers(), json=payload, timeout=self._timeout)
+        if resp.status_code >= 400:
+            raise PromptRequestError(
+                f"log_completion failed: {resp.text}", status=resp.status_code
+            )
+        return resp.json()
+
+    def send_feedback(
+        self,
+        *,
+        prompt_slug: str,
+        completion_id: str,
+        thumbs_up: bool,
+        reason: Optional[str] = None,
+        expected_output: Optional[str] = None,
+        metadata: Optional[dict[str, Any]] = None,
+    ) -> dict[str, Any]:
+        """
+        Send feedback for a specific completion.
+        
+        Args:
+            prompt_slug: The slug of the prompt
+            completion_id: UUID of the completion to provide feedback on
+            thumbs_up: True for positive feedback, False for negative
+            reason: Optional explanation of the feedback
+            expected_output: Optional description of what the expected output should be
+            metadata: Optional additional metadata
+            
+        Returns:
+            The created feedback record
+        """
+        url = f"{self._base_url}/v1/prompts/{prompt_slug}/completions/{completion_id}/feedback"
+        
+        logger.debug(
+            f"[SDK] Sending feedback for completion_id={completion_id}, prompt_slug={prompt_slug}",
+            extra={
+                "completion_id": completion_id,
+                "prompt_slug": prompt_slug,
+                "thumbs_up": thumbs_up,
+                "url": url
+            }
+        )
+        
+        payload = {
+            "thumbs_up": thumbs_up,
+        }
+        
+        # Add optional fields only if provided
+        if reason is not None:
+            payload["reason"] = reason
+        if expected_output is not None:
+            payload["expected_output"] = expected_output
+        if metadata is not None:
+            payload["metadata"] = metadata
+        
+        resp = requests.post(url, headers=self._headers(), json=payload, timeout=self._timeout)
+        
+        logger.debug(
+            f"[SDK] Feedback response status={resp.status_code}",
+            extra={
+                "status_code": resp.status_code,
+                "response_text": resp.text[:500] if resp.text else None
+            }
+        )
+        
+        if resp.status_code >= 400:
+            raise PromptRequestError(
+                f"send_feedback failed: {resp.text}", status=resp.status_code
+            )
+        return resp.json()
+
 
diff --git a/src/zeroeval/observability/integrations/openai/integration.py b/src/zeroeval/observability/integrations/openai/integration.py
index ba886a6f..dbead92d 100644
--- a/src/zeroeval/observability/integrations/openai/integration.py
+++ b/src/zeroeval/observability/integrations/openai/integration.py
@@ -67,9 +67,10 @@ def zeroeval_prompt(
     """
     Helper function to create a prompt with zeroeval metadata for tracing and observability.
     
-    IMPORTANT: This function does NOT create or update tasks in ZeroEval. It only adds
-    metadata to OpenAI API calls for tracing purposes. Tasks must be created separately
-    using Dataset.run() or Experiment.run().
+    When this prompt is used in an OpenAI API call, ZeroEval will automatically:
+    1. Extract the task metadata from the prompt
+    2. Link the span to the specified task
+    3. Create the task automatically if it doesn't exist yet
     
     Args:
         name: Required task identifier for this prompt
@@ -80,7 +81,6 @@ def zeroeval_prompt(
         A string with the format: <zeroeval>{JSON}</zeroeval>content
         
     Example:
-        >>> # This adds metadata but does NOT create a task
         >>> zeroeval_prompt(
         ...     name="custom-bot-5",
         ...     content="You are an assistant that helps users with {{task}}. Be {{tone}} in your responses.",
@@ -92,9 +92,8 @@ def zeroeval_prompt(
         '<zeroeval>{"task": "custom-bot-5", "variables": {"task": "coding questions", "tone": "helpful and concise"}}</zeroeval>You are an assistant that helps users with {{task}}. Be {{tone}} in your responses.'
         
     Note:
-        - The 'name' parameter is for linking OpenAI calls to existing tasks
-        - Tasks are created through Dataset.run() or Experiment.run()
         - Variables will be interpolated in the prompt when the OpenAI API is called
+        - The task will be automatically created in ZeroEval if it doesn't exist
     """
     metadata = {"task": name}
     
@@ -348,8 +347,8 @@ def _log_task_metadata(self, task_id: Optional[str], zeroeval_metadata: dict[str
         if task_id:
             logger.info(
                 f"{context}: Task ID '{task_id}' added to span attributes. "
-                f"This enables tracing but does NOT create/update tasks. "
-                f"Ensure the task exists or will be created through Dataset/Experiment.run()."
+                f"The task will be automatically created if it doesn't exist yet, "
+                f"and this span will be linked to it for tracing and tuning."
             )
         logger.debug(f"{context}: Full zeroeval metadata added to span: {zeroeval_metadata}")
     
@@ -391,13 +390,12 @@ def _process_messages_with_zeroeval(self, messages: Optional[list[dict[str, Any]
                 task_id = metadata.get('task')
                 logger.info(f"_process_messages_with_zeroeval: Successfully extracted metadata - task: '{task_id}', variables: {list(variables.keys()) if variables else 'none'}")
                 
-                # Important warning for users
+                # Log task linkage info
                 if task_id:
-                    logger.warning(
+                    logger.info(
                         f"_process_messages_with_zeroeval: Task ID '{task_id}' found in zeroeval_prompt. "
-                        f"Note: zeroeval_prompt does NOT automatically create or update tasks. "
-                        f"Tasks must be created separately using Dataset.run() or Experiment.run(). "
-                        f"This metadata is only used for tracing and observability."
+                        f"This span will be automatically linked to the task and the task will be "
+                        f"created if it doesn't exist yet."
                     )
             else:
                 logger.debug("_process_messages_with_zeroeval: No zeroeval metadata found in system message")
diff --git a/src/zeroeval/pyproject.toml b/src/zeroeval/pyproject.toml
index ae45880e..a913a9c3 100644
--- a/src/zeroeval/pyproject.toml
+++ b/src/zeroeval/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "zeroeval"
-version = "0.6.121"
+version = "0.6.122"
 description = "ZeroEval SDK"
 
 [project.scripts]
diff --git a/test_sampling_correct.py b/test_sampling_correct.py
deleted file mode 100644
index 1318dd81..00000000
--- a/test_sampling_correct.py
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/env python3
-"""Test script to verify sampling functionality works correctly."""
-
-import os
-import sys
-import random
-from pathlib import Path
-
-# Add the src directory to the path
-sys.path.insert(0, str(Path(__file__).parent / 'src'))
-
-# Set seed for reproducible tests
-random.seed(42)
-
-def test_sampling_rate(rate, num_traces=1000):
-    """Test sampling at a specific rate."""
-    print(f"\n=== Testing Sampling Rate: {rate*100}% ({num_traces} traces) ===")
-    
-    # Set environment variable before importing
-    os.environ["ZEROEVAL_SAMPLING_RATE"] = str(rate)
-    
-    # Force reimport to get fresh tracer instance
-    import importlib
-    import zeroeval
-    from zeroeval.observability.tracer import Tracer
-    
-    # Reset singleton
-    Tracer._instance = None
-    
-    # Reimport and initialize
-    importlib.reload(zeroeval)
-    zeroeval.init(api_key="test_key", debug=False)
-    
-    # Track sampled traces
-    sampled_count = 0
-    total_count = num_traces
-    
-    for i in range(total_count):
-        # Create a new trace each time
-        span = zeroeval.tracer.start_span(f"test_trace_{i}", is_new_trace=True)
-        trace_id = span.trace_id
-        
-        # Check if this trace is sampled BEFORE ending the span
-        # (since the trace gets cleaned up after the last span ends)
-        if trace_id in zeroeval.tracer._traces:
-            if zeroeval.tracer._traces[trace_id].is_sampled:
-                sampled_count += 1
-        
-        # End the span properly to clean up
-        zeroeval.tracer.end_span(span)
-    
-    # After all traces, check that spans were buffered for sampled traces
-    # The actual number of spans in buffer depends on flushing behavior
-    spans_buffered = len(zeroeval.tracer._spans)
-    
-    # Calculate actual sampling rate
-    actual_rate = sampled_count / total_count
-    expected_rate = rate
-    
-    # Allow for some statistical variance (±5% absolute difference for small samples)
-    # For larger samples, use tighter bounds
-    if num_traces >= 500:
-        tolerance = 0.05
-    else:
-        tolerance = 0.1  # More tolerance for smaller samples
-    
-    is_within_tolerance = abs(actual_rate - expected_rate) <= tolerance
-    
-    print(f"  Expected rate: {expected_rate*100:.1f}%")
-    print(f"  Actual rate: {actual_rate*100:.1f}% ({sampled_count}/{total_count} traces)")
-    print(f"  Within tolerance (±{tolerance*100}%): {'✅ YES' if is_within_tolerance else '❌ NO'}")
-    
-    # Check for memory leaks - all traces should be cleaned up
-    remaining_traces = len(zeroeval.tracer._traces)
-    print(f"  Memory check - remaining traces: {remaining_traces} {'✅' if remaining_traces == 0 else '❌ MEMORY LEAK!'}")
-    
-    # For sampled traces, we should have spans buffered (unless auto-flushed)
-    if expected_rate > 0:
-        print(f"  Spans buffered: {spans_buffered} (may vary due to auto-flush)")
-    
-    # Force flush to clear buffer for next test
-    zeroeval.tracer.flush()
-    
-    return is_within_tolerance, actual_rate, remaining_traces == 0
-
-
-def test_trace_completeness():
-    """Test that all spans in a trace follow the same sampling decision."""
-    print("\n=== Testing Trace Completeness ===")
-    
-    os.environ["ZEROEVAL_SAMPLING_RATE"] = "0.5"  # 50% sampling
-    
-    import importlib
-    import zeroeval
-    from zeroeval.observability.tracer import Tracer
-    
-    # Reset singleton
-    Tracer._instance = None
-    importlib.reload(zeroeval)
-    zeroeval.init(api_key="test_key", debug=False)
-    
-    # Test 10 traces with multiple spans each
-    traces_sampled = []
-    
-    for i in range(10):
-        # Start root span (new trace)
-        root = zeroeval.tracer.start_span(f"root_{i}", is_new_trace=True)
-        trace_id = root.trace_id
-        
-        # Check sampling decision while trace is active
-        is_sampled = zeroeval.tracer._traces[trace_id].is_sampled
-        traces_sampled.append(is_sampled)
-        
-        # Create child spans in same trace
-        child1 = zeroeval.tracer.start_span(f"child1_{i}")
-        child2 = zeroeval.tracer.start_span(f"child2_{i}")
-        
-        # All spans in trace should have same sampling decision
-        assert child1.trace_id == trace_id, "Child should have same trace ID"
-        assert child2.trace_id == trace_id, "Child should have same trace ID"
-        assert zeroeval.tracer._traces[trace_id].is_sampled == is_sampled, "Sampling decision changed!"
-        assert zeroeval.tracer._traces[trace_id].ref_count == 3, f"Expected ref_count=3, got {zeroeval.tracer._traces[trace_id].ref_count}"
-        
-        # End spans in reverse order (LIFO)
-        zeroeval.tracer.end_span(child2)
-        assert zeroeval.tracer._traces[trace_id].ref_count == 2, "ref_count should be 2 after ending one child"
-        
-        zeroeval.tracer.end_span(child1)
-        assert zeroeval.tracer._traces[trace_id].ref_count == 1, "ref_count should be 1 after ending both children"
-        
-        zeroeval.tracer.end_span(root)
-        
-        # After ending all spans, trace should be cleaned up
-        assert trace_id not in zeroeval.tracer._traces, f"Trace {trace_id} not cleaned up!"
-    
-    sampled = sum(traces_sampled)
-    print(f"  Traces sampled: {sampled}/10")
-    print(f"  All spans in each trace had consistent sampling: ✅")
-    print(f"  Reference counting worked correctly: ✅")
-    print(f"  All traces cleaned up after completion: ✅")
-    
-    # Force flush
-    zeroeval.tracer.flush()
-    
-
-def test_nested_spans_cleanup():
-    """Test that nested spans are properly cleaned up even when unsampled."""
-    print("\n=== Testing Nested Spans Cleanup ===")
-    
-    os.environ["ZEROEVAL_SAMPLING_RATE"] = "0"  # Sample nothing
-    
-    import importlib
-    import zeroeval
-    from zeroeval.observability.tracer import Tracer
-    
-    # Reset singleton
-    Tracer._instance = None
-    importlib.reload(zeroeval)
-    zeroeval.init(api_key="test_key", debug=False)
-    
-    # Create deeply nested spans
-    spans = []
-    for i in range(5):
-        span = zeroeval.tracer.start_span(f"level_{i}", is_new_trace=(i == 0))
-        spans.append(span)
-    
-    trace_id = spans[0].trace_id
-    
-    # Verify trace is not sampled
-    assert not zeroeval.tracer._traces[trace_id].is_sampled, "Trace should not be sampled with rate=0"
-    assert zeroeval.tracer._traces[trace_id].ref_count == 5, "Should have ref_count=5 for 5 active spans"
-    
-    # Check active spans stack has all 5 spans
-    stack = zeroeval.tracer._active_spans_ctx.get()
-    assert len(stack) == 5, f"Expected 5 spans in stack, got {len(stack)}"
-    
-    # End all spans in reverse order
-    for span in reversed(spans):
-        zeroeval.tracer.end_span(span)
-    
-    # Verify stack is empty
-    stack = zeroeval.tracer._active_spans_ctx.get()
-    assert len(stack) == 0, f"Stack should be empty, but has {len(stack)} spans"
-    
-    # Verify trace is cleaned up
-    assert trace_id not in zeroeval.tracer._traces, "Unsampled trace not cleaned up"
-    assert len(zeroeval.tracer._traces) == 0, f"Memory leak: {len(zeroeval.tracer._traces)} traces remain"
-    assert len(zeroeval.tracer._spans) == 0, f"No spans should be buffered for unsampled traces, but found {len(zeroeval.tracer._spans)}"
-    
-    print(f"  Unsampled nested spans: Created 5 levels")
-    print(f"  Stack properly cleaned: ✅")
-    print(f"  Reference counting correct: ✅")
-    print(f"  Trace properly cleaned: ✅")
-    print(f"  No spans buffered: ✅")
-
-
-def main():
-    print("Testing ZeroEval Sampling Functionality")
-    print("="*50)
-    
-    all_passed = True
-    
-    # Test different sampling rates
-    rates_to_test = [0.0, 0.1, 0.25, 0.5, 0.75, 1.0]
-    for rate in rates_to_test:
-        passed, actual, no_leak = test_sampling_rate(rate, num_traces=500)
-        all_passed = all_passed and passed and no_leak
-    
-    # Test trace completeness
-    test_trace_completeness()
-    
-    # Test cleanup of unsampled spans
-    test_nested_spans_cleanup()
-    
-    print("\n" + "="*50)
-    if all_passed:
-        print("✅ All sampling tests PASSED!")
-    else:
-        print("❌ Some sampling tests FAILED - check the output above")
-    
-    return 0 if all_passed else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/test_sampling_debug.py b/test_sampling_debug.py
deleted file mode 100644
index 064eae7e..00000000
--- a/test_sampling_debug.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python3
-"""Debug script to understand sampling issue."""
-
-import os
-import sys
-from pathlib import Path
-
-# Add the src directory to the path
-sys.path.insert(0, str(Path(__file__).parent / 'src'))
-
-def test_single_trace():
-    """Test a single trace to debug the issue."""
-    
-    # Set environment variable before importing
-    os.environ["ZEROEVAL_SAMPLING_RATE"] = "1.0"
-    
-    # Import fresh
-    import zeroeval
-    from zeroeval.observability.tracer import Tracer
-    
-    # Reset singleton
-    Tracer._instance = None
-    
-    # Initialize
-    zeroeval.init(api_key="test_key", debug=True)
-    
-    print(f"Tracer sampling rate: {zeroeval.tracer._sampling_rate}")
-    print(f"Tracer instance: {zeroeval.tracer}")
-    
-    # Create a single span/trace
-    span = zeroeval.tracer.start_span("test_trace", is_new_trace=True)
-    trace_id = span.trace_id
-    
-    print(f"Trace ID: {trace_id}")
-    print(f"Traces registry: {zeroeval.tracer._traces}")
-    
-    if trace_id in zeroeval.tracer._traces:
-        trace_info = zeroeval.tracer._traces[trace_id]
-        print(f"Trace is_sampled: {trace_info.is_sampled}")
-        print(f"Trace ref_count: {trace_info.ref_count}")
-    else:
-        print("ERROR: Trace not in registry!")
-    
-    # End the span
-    zeroeval.tracer.end_span(span)
-    
-    print(f"After ending span, traces registry: {zeroeval.tracer._traces}")
-    print(f"Buffered spans: {len(zeroeval.tracer._spans)}")
-
-
-if __name__ == "__main__":
-    test_single_trace()
diff --git a/test_sampling_fix.py b/test_sampling_fix.py
deleted file mode 100644
index a9fa7e2f..00000000
--- a/test_sampling_fix.py
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env python3
-"""Test script to verify sampling functionality works correctly."""
-
-import os
-import sys
-import random
-from pathlib import Path
-
-# Add the src directory to the path
-sys.path.insert(0, str(Path(__file__).parent / 'src'))
-
-# Set seed for reproducible tests
-random.seed(42)
-
-def test_sampling_rate(rate, num_traces=1000):
-    """Test sampling at a specific rate."""
-    print(f"\n=== Testing Sampling Rate: {rate*100}% ({num_traces} traces) ===")
-    
-    # Set environment variable before importing
-    os.environ["ZEROEVAL_SAMPLING_RATE"] = str(rate)
-    
-    # Force reimport to get fresh tracer instance
-    import importlib
-    import zeroeval
-    from zeroeval.observability.tracer import Tracer
-    
-    # Reset singleton
-    Tracer._instance = None
-    
-    # Reimport and initialize
-    importlib.reload(zeroeval)
-    zeroeval.init(api_key="test_key", debug=False)
-    
-    # Track sampled traces
-    sampled_count = 0
-    total_count = num_traces
-    
-    for i in range(total_count):
-        # Create a new trace each time
-        span = zeroeval.tracer.start_span(f"test_trace_{i}", is_new_trace=True)
-        trace_id = span.trace_id
-        
-        # Check if this trace is sampled
-        if trace_id in zeroeval.tracer._traces:
-            if zeroeval.tracer._traces[trace_id].is_sampled:
-                sampled_count += 1
-        
-        # End the span properly to clean up
-        zeroeval.tracer.end_span(span)
-    
-    # Calculate actual sampling rate
-    actual_rate = sampled_count / total_count
-    expected_rate = rate
-    
-    # Allow for some statistical variance (±5% absolute difference)
-    tolerance = 0.05
-    is_within_tolerance = abs(actual_rate - expected_rate) <= tolerance
-    
-    print(f"  Expected rate: {expected_rate*100:.1f}%")
-    print(f"  Actual rate: {actual_rate*100:.1f}% ({sampled_count}/{total_count} traces)")
-    print(f"  Within tolerance (±{tolerance*100}%): {'✅ YES' if is_within_tolerance else '❌ NO'}")
-    
-    # Check for memory leaks - all traces should be cleaned up
-    remaining_traces = len(zeroeval.tracer._traces)
-    print(f"  Memory check - remaining traces: {remaining_traces} {'✅' if remaining_traces == 0 else '❌ MEMORY LEAK!'}")
-    
-    return is_within_tolerance, actual_rate, remaining_traces == 0
-
-
-def test_trace_completeness():
-    """Test that all spans in a trace follow the same sampling decision."""
-    print("\n=== Testing Trace Completeness ===")
-    
-    os.environ["ZEROEVAL_SAMPLING_RATE"] = "0.5"  # 50% sampling
-    
-    import importlib
-    import zeroeval
-    from zeroeval.observability.tracer import Tracer
-    
-    # Reset singleton
-    Tracer._instance = None
-    importlib.reload(zeroeval)
-    zeroeval.init(api_key="test_key", debug=False)
-    
-    # Test 10 traces with multiple spans each
-    traces_sampled = []
-    
-    for i in range(10):
-        # Start root span (new trace)
-        root = zeroeval.tracer.start_span(f"root_{i}", is_new_trace=True)
-        trace_id = root.trace_id
-        is_sampled = zeroeval.tracer._traces[trace_id].is_sampled
-        traces_sampled.append(is_sampled)
-        
-        # Create child spans in same trace
-        child1 = zeroeval.tracer.start_span(f"child1_{i}")
-        child2 = zeroeval.tracer.start_span(f"child2_{i}")
-        
-        # All spans in trace should have same sampling decision
-        assert child1.trace_id == trace_id, "Child should have same trace ID"
-        assert child2.trace_id == trace_id, "Child should have same trace ID"
-        assert zeroeval.tracer._traces[trace_id].is_sampled == is_sampled, "Sampling decision changed!"
-        
-        # End spans in reverse order (LIFO)
-        zeroeval.tracer.end_span(child2)
-        zeroeval.tracer.end_span(child1)
-        zeroeval.tracer.end_span(root)
-        
-        # After ending all spans, trace should be cleaned up
-        assert trace_id not in zeroeval.tracer._traces, f"Trace {trace_id} not cleaned up!"
-    
-    sampled = sum(traces_sampled)
-    print(f"  Traces sampled: {sampled}/10")
-    print(f"  All spans in each trace had consistent sampling: ✅")
-    print(f"  All traces cleaned up after completion: ✅")
-    
-
-def test_nested_spans_cleanup():
-    """Test that nested spans are properly cleaned up even when unsampled."""
-    print("\n=== Testing Nested Spans Cleanup ===")
-    
-    os.environ["ZEROEVAL_SAMPLING_RATE"] = "0"  # Sample nothing
-    
-    import importlib
-    import zeroeval
-    from zeroeval.observability.tracer import Tracer
-    
-    # Reset singleton
-    Tracer._instance = None
-    importlib.reload(zeroeval)
-    zeroeval.init(api_key="test_key", debug=False)
-    
-    # Create deeply nested spans
-    spans = []
-    for i in range(5):
-        span = zeroeval.tracer.start_span(f"level_{i}", is_new_trace=(i == 0))
-        spans.append(span)
-    
-    trace_id = spans[0].trace_id
-    
-    # Verify trace is not sampled
-    assert not zeroeval.tracer._traces[trace_id].is_sampled, "Trace should not be sampled with rate=0"
-    
-    # Check active spans stack has all 5 spans
-    stack = zeroeval.tracer._active_spans_ctx.get()
-    assert len(stack) == 5, f"Expected 5 spans in stack, got {len(stack)}"
-    
-    # End all spans in reverse order
-    for span in reversed(spans):
-        zeroeval.tracer.end_span(span)
-    
-    # Verify stack is empty
-    stack = zeroeval.tracer._active_spans_ctx.get()
-    assert len(stack) == 0, f"Stack should be empty, but has {len(stack)} spans"
-    
-    # Verify trace is cleaned up
-    assert trace_id not in zeroeval.tracer._traces, "Unsampled trace not cleaned up"
-    assert len(zeroeval.tracer._traces) == 0, f"Memory leak: {len(zeroeval.tracer._traces)} traces remain"
-    assert len(zeroeval.tracer._spans) == 0, f"No spans should be buffered for unsampled traces, but found {len(zeroeval.tracer._spans)}"
-    
-    print(f"  Unsampled nested spans: Created 5 levels")
-    print(f"  Stack properly cleaned: ✅")
-    print(f"  Trace properly cleaned: ✅")
-    print(f"  No spans buffered: ✅")
-
-
-def main():
-    print("Testing ZeroEval Sampling Functionality")
-    print("="*50)
-    
-    all_passed = True
-    
-    # Test different sampling rates
-    rates_to_test = [0.0, 0.1, 0.25, 0.5, 0.75, 1.0]
-    for rate in rates_to_test:
-        passed, actual, no_leak = test_sampling_rate(rate, num_traces=500)
-        all_passed = all_passed and passed and no_leak
-    
-    # Test trace completeness
-    test_trace_completeness()
-    
-    # Test cleanup of unsampled spans
-    test_nested_spans_cleanup()
-    
-    print("\n" + "="*50)
-    if all_passed:
-        print("✅ All sampling tests PASSED!")
-    else:
-        print("❌ Some sampling tests FAILED - check the output above")
-    
-    return 0 if all_passed else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/tests/test_client_feedback.py b/tests/test_client_feedback.py
new file mode 100644
index 00000000..ff5eb6fe
--- /dev/null
+++ b/tests/test_client_feedback.py
@@ -0,0 +1,203 @@
+"""Tests for ZeroEval client feedback functionality."""
+
+import json
+from unittest.mock import Mock, patch
+
+import pytest
+
+from zeroeval.client import ZeroEval
+from zeroeval.errors import PromptRequestError
+
+
+@pytest.fixture
+def client():
+    """Create a ZeroEval client for testing."""
+    return ZeroEval(api_key="test-api-key", base_url="https://api.test.com")
+
+
+@patch("zeroeval.client.requests.post")
+def test_send_feedback_success(mock_post, client):
+    """Test successful feedback submission."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "id": "feedback-123",
+        "completion_id": "completion-456",
+        "prompt_id": "prompt-789",
+        "prompt_version_id": "version-abc",
+        "project_id": "project-def",
+        "thumbs_up": True,
+        "reason": "Great response",
+        "expected_output": None,
+        "metadata": {},
+        "created_by": "user-123",
+        "created_at": "2025-01-01T00:00:00Z",
+        "updated_at": "2025-01-01T00:00:00Z",
+    }
+    mock_post.return_value = mock_response
+
+    result = client.send_feedback(
+        prompt_slug="test-prompt",
+        completion_id="completion-456",
+        thumbs_up=True,
+        reason="Great response",
+    )
+
+    # Verify the request was made correctly
+    mock_post.assert_called_once()
+    call_args = mock_post.call_args
+    
+    # Check URL
+    assert call_args[0][0] == "https://api.test.com/v1/prompts/test-prompt/completions/completion-456/feedback"
+    
+    # Check headers
+    headers = call_args[1]["headers"]
+    assert headers["Authorization"] == "Bearer test-api-key"
+    assert headers["Content-Type"] == "application/json"
+    
+    # Check payload
+    payload = call_args[1]["json"]
+    assert payload["thumbs_up"] is True
+    assert payload["reason"] == "Great response"
+    assert "expected_output" not in payload  # Not included when None
+    assert "metadata" not in payload  # Not included when None
+    
+    # Check response
+    assert result["id"] == "feedback-123"
+    assert result["thumbs_up"] is True
+
+
+@patch("zeroeval.client.requests.post")
+def test_send_feedback_negative_with_expected_output(mock_post, client):
+    """Test negative feedback with expected output."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "id": "feedback-456",
+        "completion_id": "completion-789",
+        "thumbs_up": False,
+        "reason": "Incorrect format",
+        "expected_output": "Should be JSON",
+    }
+    mock_post.return_value = mock_response
+
+    result = client.send_feedback(
+        prompt_slug="test-prompt",
+        completion_id="completion-789",
+        thumbs_up=False,
+        reason="Incorrect format",
+        expected_output="Should be JSON",
+    )
+
+    # Check payload includes all fields
+    payload = mock_post.call_args[1]["json"]
+    assert payload["thumbs_up"] is False
+    assert payload["reason"] == "Incorrect format"
+    assert payload["expected_output"] == "Should be JSON"
+    
+    assert result["id"] == "feedback-456"
+
+
+@patch("zeroeval.client.requests.post")
+def test_send_feedback_with_metadata(mock_post, client):
+    """Test feedback submission with custom metadata."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "id": "feedback-789",
+        "thumbs_up": True,
+        "metadata": {"source": "automated", "version": "1.0"},
+    }
+    mock_post.return_value = mock_response
+
+    result = client.send_feedback(
+        prompt_slug="test-prompt",
+        completion_id="completion-abc",
+        thumbs_up=True,
+        metadata={"source": "automated", "version": "1.0"},
+    )
+
+    # Check metadata is included
+    payload = mock_post.call_args[1]["json"]
+    assert payload["metadata"] == {"source": "automated", "version": "1.0"}
+    
+    assert result["metadata"]["source"] == "automated"
+
+
+@patch("zeroeval.client.requests.post")
+def test_send_feedback_minimal(mock_post, client):
+    """Test feedback with only required fields."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "id": "feedback-minimal",
+        "thumbs_up": True,
+    }
+    mock_post.return_value = mock_response
+
+    result = client.send_feedback(
+        prompt_slug="test-prompt",
+        completion_id="completion-xyz",
+        thumbs_up=True,
+    )
+
+    # Check only thumbs_up is in payload
+    payload = mock_post.call_args[1]["json"]
+    assert payload == {"thumbs_up": True}
+    
+    assert result["id"] == "feedback-minimal"
+
+
+@patch("zeroeval.client.requests.post")
+def test_send_feedback_404_error(mock_post, client):
+    """Test feedback submission when completion not found."""
+    mock_response = Mock()
+    mock_response.status_code = 404
+    mock_response.text = "Completion not found"
+    mock_post.return_value = mock_response
+
+    with pytest.raises(PromptRequestError) as exc_info:
+        client.send_feedback(
+            prompt_slug="test-prompt",
+            completion_id="nonexistent",
+            thumbs_up=True,
+        )
+    
+    assert "send_feedback failed" in str(exc_info.value)
+    assert "404" in str(exc_info.value.status)
+
+
+@patch("zeroeval.client.requests.post")
+def test_send_feedback_500_error(mock_post, client):
+    """Test feedback submission with server error."""
+    mock_response = Mock()
+    mock_response.status_code = 500
+    mock_response.text = "Internal server error"
+    mock_post.return_value = mock_response
+
+    with pytest.raises(PromptRequestError) as exc_info:
+        client.send_feedback(
+            prompt_slug="test-prompt",
+            completion_id="completion-123",
+            thumbs_up=False,
+            reason="Test",
+        )
+    
+    assert "send_feedback failed" in str(exc_info.value)
+    assert "500" in str(exc_info.value.status)
+
+
+@patch("zeroeval.client.requests.post")
+def test_send_feedback_timeout(mock_post, client):
+    """Test feedback submission handles timeout correctly."""
+    mock_post.side_effect = Exception("Connection timeout")
+
+    with pytest.raises(Exception) as exc_info:
+        client.send_feedback(
+            prompt_slug="test-prompt",
+            completion_id="completion-123",
+            thumbs_up=True,
+        )
+    
+    assert "timeout" in str(exc_info.value).lower()
+