#!/usr/bin/env python3 """ Claude Code OpenAI API Wrapper - Advanced Streaming Example This example demonstrates advanced streaming functionality including error handling, chunk processing, and real-time display. """ from openai import OpenAI import time import sys import os import requests from typing import Optional, Generator import json def get_api_key(base_url: str = "http://localhost:8000") -> Optional[str]: """Get the appropriate API key based on server configuration.""" # Check if user provided API key via environment if os.getenv("API_KEY"): return os.getenv("API_KEY") # Check server auth status try: response = requests.get(f"{base_url}/v1/auth/status") if response.status_code == 200: auth_data = response.json() server_info = auth_data.get("server_info", {}) if not server_info.get("api_key_required", False): # No auth required return "no-auth-required" else: # Auth required but no key provided print("⚠️ Server requires API key but none provided.") print(" Set API_KEY environment variable with your server's API key") print(" Example: API_KEY=your-server-key python streaming.py") return None except Exception as e: print(f"⚠️ Could not check server auth status: {e}") print(" Assuming no authentication required") return "fallback-key" class StreamingClient: """Client for handling streaming responses.""" def __init__(self, base_url: str = "http://localhost:8000/v1", api_key: Optional[str] = None): if api_key is None: # Auto-detect API key based on server configuration server_base = base_url.replace("/v1", "") api_key = get_api_key(server_base) if api_key is None: raise ValueError("Server requires API key but none was provided. Set the API_KEY environment variable.") self.client = OpenAI(base_url=base_url, api_key=api_key) def stream_with_timing(self, messages: list, model: str = "claude-3-5-sonnet-20241022"): """Stream response with timing information.""" start_time = time.time() first_token_time = None token_count = 0 print("Streaming response...") print("-" * 50) try: stream = self.client.chat.completions.create( model=model, messages=messages, stream=True ) for chunk in stream: if chunk.choices[0].delta.content: if first_token_time is None: first_token_time = time.time() time_to_first_token = first_token_time - start_time print(f"[Time to first token: {time_to_first_token:.2f}s]\n") content = chunk.choices[0].delta.content print(content, end="", flush=True) token_count += 1 if chunk.choices[0].finish_reason: total_time = time.time() - start_time print(f"\n\n[Streaming completed]") print(f"[Total time: {total_time:.2f}s]") print(f"[Approximate tokens: {token_count}]") print(f"[Finish reason: {chunk.choices[0].finish_reason}]") except KeyboardInterrupt: print("\n\n[Streaming interrupted by user]") except Exception as e: print(f"\n\n[Streaming error: {e}]") def stream_with_processing(self, messages: list, process_func=None): """Stream response with custom processing function.""" if process_func is None: process_func = lambda x: x # Default: no processing stream = self.client.chat.completions.create( model="claude-3-5-sonnet-20241022", messages=messages, stream=True ) buffer = "" for chunk in stream: if chunk.choices[0].delta.content: content = chunk.choices[0].delta.content buffer += content # Process complete sentences if any(punct in content for punct in ['.', '!', '?', '\n']): processed = process_func(buffer) yield processed buffer = "" # Process remaining buffer if buffer: yield process_func(buffer) def parallel_streams(self, prompts: list): """Demo of handling multiple prompts (sequential, not truly parallel).""" for i, prompt in enumerate(prompts): print(f"\n{'='*50}") print(f"Prompt {i+1}: {prompt}") print('='*50) messages = [{"role": "user", "content": prompt}] self.stream_with_timing(messages) print() def typing_effect_demo(): """Demonstrate a typing effect with streaming.""" client = StreamingClient() print("=== Typing Effect Demo ===") messages = [ {"role": "system", "content": "You are a storyteller."}, {"role": "user", "content": "Tell me a very short story (2-3 sentences) about a robot learning to paint."} ] stream = client.client.chat.completions.create( model="claude-3-5-sonnet-20241022", messages=messages, stream=True ) for chunk in stream: if chunk.choices[0].delta.content: for char in chunk.choices[0].delta.content: print(char, end="", flush=True) time.sleep(0.05) # Typing delay print("\n") def word_highlighting_demo(): """Demonstrate processing stream to highlight specific words.""" client = StreamingClient() print("=== Word Highlighting Demo ===") print("(Technical terms will be CAPITALIZED)") def highlight_technical_terms(text: str) -> str: """Highlight technical terms by capitalizing them.""" technical_terms = ['python', 'javascript', 'api', 'function', 'variable', 'class', 'method', 'algorithm', 'data', 'code'] for term in technical_terms: text = text.replace(term, term.upper()) text = text.replace(term.capitalize(), term.upper()) return text messages = [ {"role": "user", "content": "Explain what an API is in simple terms."} ] for processed_chunk in client.stream_with_processing(messages, highlight_technical_terms): print(processed_chunk, end="", flush=True) print("\n") def progress_bar_demo(): """Demonstrate a progress bar with streaming (estimated).""" client = StreamingClient() print("=== Progress Bar Demo ===") messages = [ {"role": "user", "content": "Count from 1 to 10, with a brief pause between each number."} ] # This is a simple demo - real progress would need token counting stream = client.client.chat.completions.create( model="claude-3-5-sonnet-20241022", messages=messages, stream=True ) print("Response: ", end="", flush=True) response_text = "" for chunk in stream: if chunk.choices[0].delta.content: content = chunk.choices[0].delta.content response_text += content print(content, end="", flush=True) print("\n") def error_recovery_demo(): """Demonstrate error handling in streaming.""" client = StreamingClient() print("=== Error Recovery Demo ===") # This might cause an error if the model doesn't exist messages = [{"role": "user", "content": "Hello!"}] try: stream = client.client.chat.completions.create( model="non-existent-model", messages=messages, stream=True ) for chunk in stream: if chunk.choices[0].delta.content: print(chunk.choices[0].delta.content, end="", flush=True) except Exception as e: print(f"Error encountered: {e}") print("Retrying with valid model...") # Retry with valid model stream = client.client.chat.completions.create( model="claude-3-5-sonnet-20241022", messages=messages, stream=True ) for chunk in stream: if chunk.choices[0].delta.content: print(chunk.choices[0].delta.content, end="", flush=True) print("\n") def main(): """Run all streaming demos.""" client = StreamingClient() # Basic streaming with timing print("=== Basic Streaming with Timing ===") client.stream_with_timing([ {"role": "user", "content": "Write a one-line Python function to reverse a string."} ]) print("\n" + "="*70 + "\n") # Run other demos typing_effect_demo() print("="*70 + "\n") word_highlighting_demo() print("="*70 + "\n") progress_bar_demo() print("="*70 + "\n") error_recovery_demo() print("="*70 + "\n") # Multiple prompts print("=== Multiple Prompts Demo ===") client.parallel_streams([ "What is 2+2?", "Name a color.", "Say 'Hello, World!' in Python." ]) if __name__ == "__main__": main()