API Examples

🔌 API Examples & Recipes

Complete examples and code recipes for integrating Inferno into your applications.

🎯 Quick Start Examples

Basic Chat Completion

import openai

# Connect to Inferno (OpenAI-compatible)
client = openai.OpenAI(
    base_url="http://localhost:8080/v1",
    api_key="not-needed"  # Unless authentication is enabled
)

# Simple chat completion
response = client.chat.completions.create(
    model="llama-2-7b-chat",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What are the benefits of running AI locally?"}
    ],
    max_tokens=500,
    temperature=0.7
)

print(response.choices[0].message.content)

Streaming Response

# Stream responses in real-time
stream = client.chat.completions.create(
    model="llama-2-7b-chat",
    messages=[{"role": "user", "content": "Write a short story about AI"}],
    stream=True,
    max_tokens=1000
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")

💻 Language-Specific Examples

Python Integration

Using OpenAI Client Library

import openai
import asyncio
from typing import List, Dict

class InfernoClient:
    def __init__(self, base_url: str = "http://localhost:8080/v1"):
        self.client = openai.OpenAI(base_url=base_url, api_key="not-needed")

    def chat(self, messages: List[Dict[str, str]], model: str = "llama-2-7b-chat"):
        """Simple chat completion"""
        return self.client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.7,
            max_tokens=500
        )

    def code_generation(self, prompt: str, language: str = "python"):
        """Generate code with optimized settings"""
        system_prompt = f"You are an expert {language} programmer. Write clean, efficient code."

        return self.client.chat.completions.create(
            model="codellama-13b",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1,  # Lower temperature for code
            max_tokens=1000
        )

    def summarize_document(self, text: str, max_summary_length: int = 200):
        """Summarize long documents"""
        prompt = f"Summarize the following text in {max_summary_length} words or less:\n\n{text}"

        return self.client.chat.completions.create(
            model="llama-2-13b-chat",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=max_summary_length * 2  # Buffer for token estimation
        )

# Usage examples
inferno = InfernoClient()

# Chat example
response = inferno.chat([
    {"role": "user", "content": "Explain quantum computing in simple terms"}
])
print(response.choices[0].message.content)

# Code generation example
code_response = inferno.code_generation(
    "Create a Python function that calculates the Fibonacci sequence",
    "python"
)
print(code_response.choices[0].message.content)

Using Requests Library

import requests
import json

def call_inferno_api(prompt: str, model: str = "llama-2-7b-chat"):
    """Direct HTTP API call using requests"""
    url = "http://localhost:8080/v1/chat/completions"

    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 500,
        "temperature": 0.7,
        "stream": False
    }

    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer not-needed"  # If auth enabled, use real token
    }

    response = requests.post(url, json=payload, headers=headers)

    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        raise Exception(f"API call failed: {response.status_code} - {response.text}")

# Usage
result = call_inferno_api("What are the main benefits of renewable energy?")
print(result)

JavaScript/Node.js Integration

Using OpenAI SDK

import OpenAI from 'openai';

const openai = new OpenAI({
  baseURL: 'http://localhost:8080/v1',
  apiKey: 'not-needed', // Unless authentication is enabled
});

// Simple chat completion
async function chatWithInferno(message) {
  const completion = await openai.chat.completions.create({
    messages: [{ role: 'user', content: message }],
    model: 'llama-2-7b-chat',
    max_tokens: 500,
    temperature: 0.7,
  });

  return completion.choices[0].message.content;
}

// Streaming example
async function streamingChat(message) {
  const stream = await openai.chat.completions.create({
    model: 'llama-2-7b-chat',
    messages: [{ role: 'user', content: message }],
    stream: true,
    max_tokens: 1000,
  });

  for await (const chunk of stream) {
    process.stdout.write(chunk.choices[0]?.delta?.content || '');
  }
}

// Usage
chatWithInferno("Explain the concept of machine learning")
  .then(response => console.log(response))
  .catch(error => console.error(error));

streamingChat("Write a poem about technology");

Using Fetch API

async function infernoRequest(prompt, model = 'llama-2-7b-chat') {
  const response = await fetch('http://localhost:8080/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      'Authorization': 'Bearer not-needed'
    },
    body: JSON.stringify({
      model: model,
      messages: [{ role: 'user', content: prompt }],
      max_tokens: 500,
      temperature: 0.7
    })
  });

  if (!response.ok) {
    throw new Error(`HTTP error! status: ${response.status}`);
  }

  const data = await response.json();
  return data.choices[0].message.content;
}

// Usage
infernoRequest("What is the future of artificial intelligence?")
  .then(result => console.log(result))
  .catch(error => console.error('Error:', error));

Rust Integration

use reqwest;
use serde_json::{json, Value};
use tokio;

#[derive(Debug)]
struct InfernoClient {
    base_url: String,
    client: reqwest::Client,
}

impl InfernoClient {
    fn new(base_url: &str) -> Self {
        Self {
            base_url: base_url.to_string(),
            client: reqwest::Client::new(),
        }
    }

    async fn chat_completion(
        &self,
        prompt: &str,
        model: &str,
    ) -> Result<String, Box<dyn std::error::Error>> {
        let url = format!("{}/v1/chat/completions", self.base_url);

        let payload = json!({
            "model": model,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": 500,
            "temperature": 0.7
        });

        let response = self
            .client
            .post(&url)
            .header("Content-Type", "application/json")
            .json(&payload)
            .send()
            .await?;

        let json: Value = response.json().await?;

        Ok(json["choices"][0]["message"]["content"]
            .as_str()
            .unwrap_or("")
            .to_string())
    }
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = InfernoClient::new("http://localhost:8080");

    let response = client
        .chat_completion("Explain Rust's ownership system", "llama-2-7b-chat")
        .await?;

    println!("Response: {}", response);

    Ok(())
}

Go Integration

package main

import (
    "bytes"
    "encoding/json"
    "fmt"
    "io/ioutil"
    "net/http"
)

type ChatMessage struct {
    Role    string `json:"role"`
    Content string `json:"content"`
}

type ChatRequest struct {
    Model     string        `json:"model"`
    Messages  []ChatMessage `json:"messages"`
    MaxTokens int          `json:"max_tokens"`
    Temperature float64    `json:"temperature"`
}

type ChatChoice struct {
    Message ChatMessage `json:"message"`
}

type ChatResponse struct {
    Choices []ChatChoice `json:"choices"`
}

type InfernoClient struct {
    BaseURL string
    Client  *http.Client
}

func NewInfernoClient(baseURL string) *InfernoClient {
    return &InfernoClient{
        BaseURL: baseURL,
        Client:  &http.Client{},
    }
}

func (c *InfernoClient) ChatCompletion(prompt, model string) (string, error) {
    request := ChatRequest{
        Model: model,
        Messages: []ChatMessage{
            {Role: "user", Content: prompt},
        },
        MaxTokens:   500,
        Temperature: 0.7,
    }

    jsonData, err := json.Marshal(request)
    if err != nil {
        return "", err
    }

    url := fmt.Sprintf("%s/v1/chat/completions", c.BaseURL)
    req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
    if err != nil {
        return "", err
    }

    req.Header.Set("Content-Type", "application/json")

    resp, err := c.Client.Do(req)
    if err != nil {
        return "", err
    }
    defer resp.Body.Close()

    body, err := ioutil.ReadAll(resp.Body)
    if err != nil {
        return "", err
    }

    var chatResp ChatResponse
    err = json.Unmarshal(body, &chatResp)
    if err != nil {
        return "", err
    }

    if len(chatResp.Choices) > 0 {
        return chatResp.Choices[0].Message.Content, nil
    }

    return "", fmt.Errorf("no response choices")
}

func main() {
    client := NewInfernoClient("http://localhost:8080")

    response, err := client.ChatCompletion(
        "What are the advantages of Go programming language?",
        "llama-2-7b-chat",
    )
    if err != nil {
        fmt.Printf("Error: %v\n", err)
        return
    }

    fmt.Printf("Response: %s\n", response)
}

🏗️ Application Integration Patterns

Document Processing Pipeline

import openai
from typing import List
import asyncio

class DocumentProcessor:
    def __init__(self, inferno_url: str = "http://localhost:8080/v1"):
        self.client = openai.OpenAI(base_url=inferno_url, api_key="not-needed")

    async def process_document(self, text: str) -> dict:
        """Complete document processing pipeline"""
        tasks = {
            'summary': self.summarize(text),
            'key_points': self.extract_key_points(text),
            'sentiment': self.analyze_sentiment(text),
            'questions': self.generate_questions(text)
        }

        results = {}
        for task_name, task in tasks.items():
            try:
                results[task_name] = await task
            except Exception as e:
                results[task_name] = f"Error: {str(e)}"

        return results

    async def summarize(self, text: str) -> str:
        response = self.client.chat.completions.create(
            model="llama-2-13b-chat",
            messages=[{
                "role": "user",
                "content": f"Summarize this document in 3-5 sentences:\n\n{text}"
            }],
            temperature=0.3,
            max_tokens=200
        )
        return response.choices[0].message.content

    async def extract_key_points(self, text: str) -> str:
        response = self.client.chat.completions.create(
            model="llama-2-13b-chat",
            messages=[{
                "role": "user",
                "content": f"Extract the 5 most important key points from this text:\n\n{text}"
            }],
            temperature=0.2,
            max_tokens=300
        )
        return response.choices[0].message.content

    async def analyze_sentiment(self, text: str) -> str:
        response = self.client.chat.completions.create(
            model="llama-2-7b-chat",
            messages=[{
                "role": "user",
                "content": f"Analyze the sentiment of this text (positive, negative, neutral):\n\n{text}"
            }],
            temperature=0.1,
            max_tokens=100
        )
        return response.choices[0].message.content

    async def generate_questions(self, text: str) -> str:
        response = self.client.chat.completions.create(
            model="llama-2-13b-chat",
            messages=[{
                "role": "user",
                "content": f"Generate 3 thoughtful questions based on this text:\n\n{text}"
            }],
            temperature=0.5,
            max_tokens=200
        )
        return response.choices[0].message.content

# Usage
async def main():
    processor = DocumentProcessor()

    document = """
    Artificial Intelligence has transformed numerous industries over the past decade.
    From healthcare diagnostics to autonomous vehicles, AI applications continue to expand.
    However, concerns about privacy, job displacement, and algorithmic bias remain significant.
    The future of AI development will likely focus on addressing these challenges while
    maximizing the technology's benefits for society.
    """

    results = await processor.process_document(document)

    for task, result in results.items():
        print(f"\n{task.upper()}:")
        print(result)

# Run the example
asyncio.run(main())

Code Review Assistant

import openai

class CodeReviewAssistant:
    def __init__(self, inferno_url: str = "http://localhost:8080/v1"):
        self.client = openai.OpenAI(base_url=inferno_url, api_key="not-needed")

    def review_code(self, code: str, language: str = "python") -> dict:
        """Comprehensive code review"""
        return {
            'security_review': self.security_review(code, language),
            'performance_review': self.performance_review(code, language),
            'style_review': self.style_review(code, language),
            'suggestions': self.improvement_suggestions(code, language)
        }

    def security_review(self, code: str, language: str) -> str:
        response = self.client.chat.completions.create(
            model="codellama-13b",
            messages=[{
                "role": "system",
                "content": "You are a security expert. Review code for security vulnerabilities."
            }, {
                "role": "user",
                "content": f"Review this {language} code for security issues:\n\n```{language}\n{code}\n```"
            }],
            temperature=0.1,
            max_tokens=500
        )
        return response.choices[0].message.content

    def performance_review(self, code: str, language: str) -> str:
        response = self.client.chat.completions.create(
            model="codellama-13b",
            messages=[{
                "role": "system",
                "content": "You are a performance optimization expert. Identify performance bottlenecks."
            }, {
                "role": "user",
                "content": f"Analyze this {language} code for performance issues:\n\n```{language}\n{code}\n```"
            }],
            temperature=0.1,
            max_tokens=500
        )
        return response.choices[0].message.content

    def style_review(self, code: str, language: str) -> str:
        response = self.client.chat.completions.create(
            model="codellama-13b",
            messages=[{
                "role": "system",
                "content": f"You are a {language} expert. Review code style and best practices."
            }, {
                "role": "user",
                "content": f"Review this {language} code for style and best practices:\n\n```{language}\n{code}\n```"
            }],
            temperature=0.2,
            max_tokens=500
        )
        return response.choices[0].message.content

    def improvement_suggestions(self, code: str, language: str) -> str:
        response = self.client.chat.completions.create(
            model="codellama-13b",
            messages=[{
                "role": "user",
                "content": f"Suggest improvements for this {language} code:\n\n```{language}\n{code}\n```"
            }],
            temperature=0.3,
            max_tokens=600
        )
        return response.choices[0].message.content

# Usage example
reviewer = CodeReviewAssistant()

sample_code = """
def process_user_data(user_input):
    # Process user data
    data = eval(user_input)  # Security issue!

    result = []
    for i in range(len(data)):  # Performance issue
        if data[i] > 0:
            result.append(data[i] * 2)

    return result
"""

review = reviewer.review_code(sample_code, "python")

for category, feedback in review.items():
    print(f"\n{category.upper().replace('_', ' ')}:")
    print(feedback)

Customer Support Automation

import openai
from datetime import datetime
import json

class SupportBot:
    def __init__(self, inferno_url: str = "http://localhost:8080/v1"):
        self.client = openai.OpenAI(base_url=inferno_url, api_key="not-needed")
        self.conversation_history = []

    def handle_support_request(self, user_message: str, context: dict = None) -> dict:
        """Process support request and return structured response"""

        # Classify the request
        classification = self.classify_request(user_message)

        # Generate response based on classification
        if classification['urgent']:
            response = self.handle_urgent_request(user_message, context)
        else:
            response = self.handle_standard_request(user_message, context)

        # Log the interaction
        self.log_interaction(user_message, response, classification)

        return {
            'response': response,
            'classification': classification,
            'timestamp': datetime.now().isoformat(),
            'escalate': classification.get('escalate', False)
        }

    def classify_request(self, message: str) -> dict:
        """Classify support request type and urgency"""
        response = self.client.chat.completions.create(
            model="llama-2-7b-chat",
            messages=[{
                "role": "system",
                "content": """Classify this support request. Return JSON with:
                - category: billing, technical, account, general
                - urgency: low, medium, high
                - urgent: true/false
                - escalate: true/false
                - sentiment: positive, neutral, negative"""
            }, {
                "role": "user",
                "content": f"Classify this support request: {message}"
            }],
            temperature=0.1,
            max_tokens=200
        )

        try:
            return json.loads(response.choices[0].message.content)
        except:
            return {
                'category': 'general',
                'urgency': 'medium',
                'urgent': False,
                'escalate': False,
                'sentiment': 'neutral'
            }

    def handle_urgent_request(self, message: str, context: dict) -> str:
        """Handle urgent support requests"""
        response = self.client.chat.completions.create(
            model="llama-2-13b-chat",
            messages=[{
                "role": "system",
                "content": """You are an expert customer support agent handling an urgent request.
                Provide immediate, actionable solutions. Be empathetic and professional.
                If you cannot resolve the issue, clearly explain next steps."""
            }, {
                "role": "user",
                "content": f"URGENT: {message}\nContext: {context}"
            }],
            temperature=0.2,
            max_tokens=500
        )
        return response.choices[0].message.content

    def handle_standard_request(self, message: str, context: dict) -> str:
        """Handle standard support requests"""
        response = self.client.chat.completions.create(
            model="llama-2-7b-chat",
            messages=[{
                "role": "system",
                "content": """You are a helpful customer support agent. Provide clear,
                helpful responses. Ask clarifying questions if needed. Be friendly and professional."""
            }, {
                "role": "user",
                "content": f"{message}\nContext: {context}"
            }],
            temperature=0.3,
            max_tokens=400
        )
        return response.choices[0].message.content

    def log_interaction(self, user_message: str, response: str, classification: dict):
        """Log support interaction for analysis"""
        interaction = {
            'timestamp': datetime.now().isoformat(),
            'user_message': user_message,
            'bot_response': response,
            'classification': classification
        }
        self.conversation_history.append(interaction)

        # In a real implementation, you'd save this to a database
        print(f"Logged interaction: {classification['category']} - {classification['urgency']}")

# Usage example
support_bot = SupportBot()

# Example urgent request
urgent_response = support_bot.handle_support_request(
    "My payment failed and I can't access my account! This is blocking my work!",
    {"user_id": "12345", "account_type": "premium"}
)

print("URGENT REQUEST:")
print(f"Response: {urgent_response['response']}")
print(f"Should escalate: {urgent_response['escalate']}")

# Example standard request
standard_response = support_bot.handle_support_request(
    "How do I change my email address in my profile?",
    {"user_id": "67890", "account_type": "free"}
)

print("\nSTANDARD REQUEST:")
print(f"Response: {standard_response['response']}")

🌐 WebSocket Streaming Integration

Real-time Chat Interface

class InfernoStreamingClient {
    constructor(baseUrl = 'ws://localhost:8080') {
        this.baseUrl = baseUrl;
        this.ws = null;
        this.messageHandlers = new Map();
    }

    connect() {
        return new Promise((resolve, reject) => {
            this.ws = new WebSocket(`${this.baseUrl}/ws/stream`);

            this.ws.onopen = () => {
                console.log('Connected to Inferno WebSocket');
                resolve();
            };

            this.ws.onmessage = (event) => {
                try {
                    const data = JSON.parse(event.data);
                    this.handleMessage(data);
                } catch (error) {
                    console.error('Failed to parse WebSocket message:', error);
                }
            };

            this.ws.onerror = (error) => {
                console.error('WebSocket error:', error);
                reject(error);
            };

            this.ws.onclose = () => {
                console.log('WebSocket connection closed');
            };
        });
    }

    streamChat(message, model = 'llama-2-7b-chat', onToken, onComplete) {
        const requestId = Date.now().toString();

        // Set up handlers for this request
        this.messageHandlers.set(requestId, { onToken, onComplete });

        const request = {
            id: requestId,
            type: 'chat_completion',
            model: model,
            messages: [{ role: 'user', content: message }],
            stream: true,
            max_tokens: 500,
            temperature: 0.7
        };

        this.ws.send(JSON.stringify(request));

        return requestId;
    }

    handleMessage(data) {
        const { id, type, content, done, error } = data;
        const handler = this.messageHandlers.get(id);

        if (!handler) return;

        if (error) {
            handler.onComplete(null, error);
            this.messageHandlers.delete(id);
        } else if (type === 'token' && content) {
            handler.onToken(content);
        } else if (done) {
            handler.onComplete(content);
            this.messageHandlers.delete(id);
        }
    }

    disconnect() {
        if (this.ws) {
            this.ws.close();
        }
    }
}

// Usage example - Real-time chat interface
async function createChatInterface() {
    const client = new InfernoStreamingClient();
    await client.connect();

    const chatContainer = document.getElementById('chat-container');
    const messageInput = document.getElementById('message-input');
    const sendButton = document.getElementById('send-button');

    sendButton.addEventListener('click', () => {
        const message = messageInput.value.trim();
        if (!message) return;

        // Add user message to chat
        addMessageToChat('user', message);
        messageInput.value = '';

        // Create response container
        const responseElement = addMessageToChat('assistant', '');
        let fullResponse = '';

        // Stream AI response
        client.streamChat(
            message,
            'llama-2-7b-chat',
            (token) => {
                // Handle each token
                fullResponse += token;
                responseElement.textContent = fullResponse;
                chatContainer.scrollTop = chatContainer.scrollHeight;
            },
            (finalResponse, error) => {
                // Handle completion
                if (error) {
                    responseElement.textContent = `Error: ${error}`;
                    responseElement.classList.add('error');
                } else {
                    responseElement.textContent = fullResponse;
                }
            }
        );
    });

    function addMessageToChat(role, content) {
        const messageDiv = document.createElement('div');
        messageDiv.className = `message ${role}`;
        messageDiv.textContent = content;
        chatContainer.appendChild(messageDiv);
        return messageDiv;
    }
}

// Initialize when page loads
document.addEventListener('DOMContentLoaded', createChatInterface);

🔄 Batch Processing Examples

Bulk Document Processing

import asyncio
import aiofiles
from pathlib import Path
import openai

class BatchDocumentProcessor:
    def __init__(self, inferno_url: str = "http://localhost:8080/v1"):
        self.client = openai.OpenAI(base_url=inferno_url, api_key="not-needed")

    async def process_directory(self, input_dir: str, output_dir: str, operation: str):
        """Process all text files in a directory"""
        input_path = Path(input_dir)
        output_path = Path(output_dir)
        output_path.mkdir(exist_ok=True)

        # Find all text files
        text_files = list(input_path.glob("*.txt")) + list(input_path.glob("*.md"))

        # Process files concurrently (limit concurrency to avoid overwhelming)
        semaphore = asyncio.Semaphore(5)  # Max 5 concurrent requests

        tasks = [
            self.process_file_with_semaphore(semaphore, file_path, output_path, operation)
            for file_path in text_files
        ]

        results = await asyncio.gather(*tasks, return_exceptions=True)

        # Summary report
        successful = sum(1 for r in results if not isinstance(r, Exception))
        failed = len(results) - successful

        print(f"Processing complete: {successful} successful, {failed} failed")
        return results

    async def process_file_with_semaphore(self, semaphore, file_path, output_path, operation):
        async with semaphore:
            return await self.process_single_file(file_path, output_path, operation)

    async def process_single_file(self, file_path: Path, output_path: Path, operation: str):
        """Process a single file"""
        try:
            # Read input file
            async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
                content = await f.read()

            # Process based on operation type
            if operation == "summarize":
                result = await self.summarize_text(content)
                suffix = "_summary"
            elif operation == "translate_spanish":
                result = await self.translate_text(content, "Spanish")
                suffix = "_spanish"
            elif operation == "extract_keywords":
                result = await self.extract_keywords(content)
                suffix = "_keywords"
            else:
                raise ValueError(f"Unknown operation: {operation}")

            # Write result
            output_file = output_path / f"{file_path.stem}{suffix}.txt"
            async with aiofiles.open(output_file, 'w', encoding='utf-8') as f:
                await f.write(result)

            print(f"Processed: {file_path.name} -> {output_file.name}")
            return {"status": "success", "input": str(file_path), "output": str(output_file)}

        except Exception as e:
            print(f"Error processing {file_path.name}: {str(e)}")
            return {"status": "error", "input": str(file_path), "error": str(e)}

    async def summarize_text(self, text: str) -> str:
        """Summarize text content"""
        response = self.client.chat.completions.create(
            model="llama-2-13b-chat",
            messages=[{
                "role": "user",
                "content": f"Provide a concise summary of this text:\n\n{text}"
            }],
            temperature=0.3,
            max_tokens=300
        )
        return response.choices[0].message.content

    async def translate_text(self, text: str, target_language: str) -> str:
        """Translate text to target language"""
        response = self.client.chat.completions.create(
            model="llama-2-13b-chat",
            messages=[{
                "role": "user",
                "content": f"Translate this text to {target_language}:\n\n{text}"
            }],
            temperature=0.2,
            max_tokens=len(text.split()) * 2  # Estimate tokens needed
        )
        return response.choices[0].message.content

    async def extract_keywords(self, text: str) -> str:
        """Extract keywords from text"""
        response = self.client.chat.completions.create(
            model="llama-2-7b-chat",
            messages=[{
                "role": "user",
                "content": f"Extract the 10 most important keywords from this text:\n\n{text}"
            }],
            temperature=0.1,
            max_tokens=200
        )
        return response.choices[0].message.content

# Usage example
async def main():
    processor = BatchDocumentProcessor()

    # Process all documents in a directory
    results = await processor.process_directory(
        input_dir="./documents",
        output_dir="./processed",
        operation="summarize"
    )

    print("\nProcessing Results:")
    for result in results:
        if isinstance(result, dict):
            print(f"  {result['status']}: {result['input']}")
        else:
            print(f"  Exception: {result}")

# Run the batch processor
asyncio.run(main())

🔐 Authentication Examples

API Key Authentication

import openai
import os

# Setup with API key authentication
client = openai.OpenAI(
    base_url="http://localhost:8080/v1",
    api_key=os.getenv("INFERNO_API_KEY")  # Set your API key
)

try:
    response = client.chat.completions.create(
        model="llama-2-7b-chat",
        messages=[{"role": "user", "content": "Hello, authenticated world!"}]
    )
    print(response.choices[0].message.content)
except openai.AuthenticationError:
    print("Authentication failed. Check your API key.")
except Exception as e:
    print(f"Error: {e}")

JWT Token Authentication

import requests
import jwt
from datetime import datetime, timedelta

class InfernoAuthClient:
    def __init__(self, base_url: str, username: str, password: str):
        self.base_url = base_url
        self.token = None
        self.authenticate(username, password)

    def authenticate(self, username: str, password: str):
        """Authenticate and get JWT token"""
        auth_url = f"{self.base_url}/auth/login"

        response = requests.post(auth_url, json={
            "username": username,
            "password": password
        })

        if response.status_code == 200:
            self.token = response.json()["token"]
            print("Authentication successful")
        else:
            raise Exception(f"Authentication failed: {response.text}")

    def make_authenticated_request(self, prompt: str, model: str = "llama-2-7b-chat"):
        """Make API request with JWT token"""
        headers = {
            "Authorization": f"Bearer {self.token}",
            "Content-Type": "application/json"
        }

        payload = {
            "model": model,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": 500
        }

        response = requests.post(
            f"{self.base_url}/v1/chat/completions",
            json=payload,
            headers=headers
        )

        if response.status_code == 200:
            return response.json()["choices"][0]["message"]["content"]
        elif response.status_code == 401:
            print("Token expired, re-authenticating...")
            # In a real app, you'd refresh the token here
            raise Exception("Authentication required")
        else:
            raise Exception(f"Request failed: {response.text}")

# Usage
auth_client = InfernoAuthClient(
    "http://localhost:8080",
    username="your_username",
    password="your_password"
)

result = auth_client.make_authenticated_request(
    "What are the latest trends in AI?"
)
print(result)

📊 Error Handling Best Practices

import openai
import time
from typing import Optional
import logging

class RobustInfernoClient:
    def __init__(self, base_url: str = "http://localhost:8080/v1", max_retries: int = 3):
        self.client = openai.OpenAI(base_url=base_url, api_key="not-needed")
        self.max_retries = max_retries
        self.logger = logging.getLogger(__name__)

    def chat_with_retry(self, prompt: str, model: str = "llama-2-7b-chat") -> Optional[str]:
        """Chat with automatic retry and error handling"""

        for attempt in range(self.max_retries):
            try:
                response = self.client.chat.completions.create(
                    model=model,
                    messages=[{"role": "user", "content": prompt}],
                    max_tokens=500,
                    temperature=0.7,
                    timeout=30  # 30 second timeout
                )

                return response.choices[0].message.content

            except openai.APITimeoutError:
                self.logger.warning(f"Timeout on attempt {attempt + 1}")
                if attempt < self.max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff
                    continue
                else:
                    self.logger.error("Max retries exceeded for timeout")
                    return None

            except openai.APIError as e:
                self.logger.error(f"API error on attempt {attempt + 1}: {e}")
                if e.status_code == 503:  # Service unavailable
                    if attempt < self.max_retries - 1:
                        time.sleep(5)  # Wait longer for service issues
                        continue
                return None

            except openai.APIConnectionError:
                self.logger.error(f"Connection error on attempt {attempt + 1}")
                if attempt < self.max_retries - 1:
                    time.sleep(2 ** attempt)
                    continue
                return None

            except Exception as e:
                self.logger.error(f"Unexpected error: {e}")
                return None

        return None

    def validate_response(self, response: str) -> bool:
        """Validate AI response quality"""
        if not response:
            return False

        # Check for common error patterns
        error_patterns = [
            "I cannot",
            "I'm unable to",
            "Error:",
            "Sorry, I can't"
        ]

        response_lower = response.lower()
        for pattern in error_patterns:
            if pattern.lower() in response_lower:
                return False

        # Check minimum length
        if len(response.strip()) < 10:
            return False

        return True

# Usage example with comprehensive error handling
def safe_ai_interaction(prompt: str) -> str:
    client = RobustInfernoClient()

    # Try primary model
    response = client.chat_with_retry(prompt, "llama-2-13b-chat")

    if response and client.validate_response(response):
        return response

    # Fallback to smaller model
    print("Primary model failed, trying fallback...")
    response = client.chat_with_retry(prompt, "llama-2-7b-chat")

    if response and client.validate_response(response):
        return response

    # Final fallback
    return "I apologize, but I'm currently unable to process your request. Please try again later."

# Example usage
result = safe_ai_interaction("Explain the benefits of renewable energy")
print(result)

🚀 Performance Optimization Tips

Request Optimization

# Optimize token usage
response = client.chat.completions.create(
    model="llama-2-7b-chat",
    messages=[{"role": "user", "content": prompt}],
    max_tokens=100,  # Limit tokens for faster responses
    temperature=0.7,
    top_p=0.9,       # Nucleus sampling for efficiency
    frequency_penalty=0.1,  # Reduce repetition
    presence_penalty=0.1    # Encourage diversity
)

# Use shorter prompts when possible
short_prompt = "Summarize: " + long_text[:500]  # Truncate input

# Batch similar requests
prompts = ["Question 1", "Question 2", "Question 3"]
responses = []

for prompt in prompts:
    response = client.chat.completions.create(
        model="llama-2-7b-chat",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=50  # Short responses for batch processing
    )
    responses.append(response.choices[0].message.content)

Model Selection Strategy

def choose_optimal_model(task_type: str, complexity: str) -> str:
    """Choose the best model based on task requirements"""

    model_matrix = {
        "simple": {
            "qa": "llama-2-7b-chat",
            "code": "codellama-7b",
            "creative": "llama-2-7b-chat"
        },
        "complex": {
            "qa": "llama-2-13b-chat",
            "code": "codellama-13b",
            "creative": "llama-2-13b-chat"
        },
        "expert": {
            "qa": "llama-2-70b-chat",
            "code": "codellama-34b",
            "creative": "llama-2-70b-chat"
        }
    }

    return model_matrix.get(complexity, {}).get(task_type, "llama-2-7b-chat")

# Usage
model = choose_optimal_model("code", "complex")
print(f"Selected model: {model}")

API Examples & Recipes updated for Inferno v1.0.0. Need help with integration? Visit GitHub Discussions for community help!

API Examples

🔌 API Examples & Recipes

🎯 Quick Start Examples

Basic Chat Completion

Streaming Response

💻 Language-Specific Examples

Python Integration

Using OpenAI Client Library

Using Requests Library

JavaScript/Node.js Integration

Using OpenAI SDK

Using Fetch API

Rust Integration

Go Integration

🏗️ Application Integration Patterns

Document Processing Pipeline

Code Review Assistant

Customer Support Automation

🌐 WebSocket Streaming Integration

Real-time Chat Interface

🔄 Batch Processing Examples

Bulk Document Processing

🔐 Authentication Examples

API Key Authentication

JWT Token Authentication

📊 Error Handling Best Practices

🚀 Performance Optimization Tips

Request Optimization

Model Selection Strategy

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

📚 Inferno Wiki

🚀 Getting Started

📖 User Guides

🔧 Advanced Topics

💻 API & Integration

🛠️ Development

❓ Help & Support

📊 Reference

Clone this wiki locally