# llm_handler.py (Refactored) import os import requests from typing import Iterator, Optional # A simple registry to define providers and their models PROVIDER_CONFIG = { "anthropic": { "models": { "claude-3-5-sonnet-20241022": {"provider": "anthropic", "api_url": "https://api.anthropic.com/v1/messages"}, "claude-3-haiku-20240307": {"provider": "anthropic", "api_url": "https://api.anthropic.com/v1/messages"}, }, "openrouter": { "models": { "anthropic/claude-3-opus-20240229": {"provider": "anthropic", "api_url": "https://api.anthropic.com/v1/messages"}, "google/gemini-2.0-flash-exp": {"provider": "google", "api_url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent"}, } }, "huggingface": { "models": { "mistralai/Mixtral-8x7B": {"provider": "huggingface", "api_url": "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B"}, "meta-llama/Meta-Llama-3.1-8B-Instruct": {"provider": "huggingface", "api_url": "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct"}, } } } } class LLMHandler: """ Handle LLM interactions via OpenRouter. - Uses OPENROUTER_API_KEY (required). - Default model from PREFERRED_MODEL or google/gemini-2.0-flash-exp. - Supports dynamic override from UI (model_override). """ def __init__(self, model_override: str | None = None): self.openrouter_key = os.getenv("OPENROUTER_API_KEY") if not self.openrouter_key: raise ValueError( "OPENROUTER_API_KEY is not set. Configure it in your Space secrets." ) default_model = os.getenv("PREFERRED_MODEL", "google/gemini-2.0-flash-exp") self.model_id = model_override or default_model def set_model(self, model_name: str): """Update active model at runtime.""" if model_name: self.model_id = model_name def generate_streaming(self, prompt: str, model: Optional[str] = None) -> Iterator[str]: """ Generate a streaming response using OpenRouter chat completions. """ model_to_use = model or self.model_id print(f"[LLMHandler] Using OpenRouter model: {model_to_use}") try: yield from self._call_openrouter_streaming(prompt, model_to_use) except Exception as e: error_msg = f"Error during generation with OpenRouter: {str(e)}" print(error_msg) yield error_msg def _call_anthropic_streaming(self, prompt: str, api_url: str) -> Iterator[str]: headers = { "x-api-key": self.anthropic_key, "anthropic-version": "2023-06-01", "content-type": "application/json" } data = {"model": self.model_id, "max_tokens": 2000, "stream": True, "messages": [{"role": "user", "content": prompt}]} response = requests.post(api_url, headers=headers, json=data, stream=True, timeout=60) response.raise_for_status() for line in response.iter_lines(): line = line.decode('utf-8') if line.startswith("data: "): line = line[6:] if line == "[DONE]": break try: chunk = json.loads(line) if chunk.get("type") == "content_block_delta" and chunk.get("delta", {}).get("text"): yield chunk["delta"]["text"] except json.JSONDecodeError: continue def _call_huggingface_streaming(self, prompt: str, api_url: str) -> Iterator[str]: headers = {"Authorization": f"Bearer {self.hf_token}", "Content-Type": "application/json"} data = {"inputs": prompt, "parameters": {"max_new_tokens": 2000, "stream": True}} response = requests.post(api_url, headers=headers, json=data, stream=True, timeout=60) if response.status_code == 503: # Model loading yield "Model is loading, please try again in a few moments..." return response.raise_for_status() for line in response.iter_lines(): if line: try: chunk = json.loads(line.decode('utf-8')) if 'token' in chunk: text = chunk.get('token', {}).get('text', '') if text: yield text except json.JSONDecodeError: continue def _call_openrouter_streaming(self, prompt: str, model_id: str) -> Iterator[str]: """ Stream completions from OpenRouter using OpenAI-compatible SSE. """ api_url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Authorization": f"Bearer {self.openrouter_key}", "Content-Type": "application/json", "HTTP-Referer": "https://huggingface.co/spaces/jblast94/my-voice-agent", "X-Title": "my-voice-agent", } data = { "model": model_id, "stream": True, "messages": [ {"role": "system", "content": "You are a helpful, friendly AI assistant."}, {"role": "user", "content": prompt}, ], } with requests.post(api_url, headers=headers, json=data, stream=True, timeout=60) as response: response.raise_for_status() for raw_line in response.iter_lines(): if not raw_line: continue if raw_line.startswith(b"data: "): payload = raw_line[6:] if payload == b"[DONE]": break try: chunk = json.loads(payload.decode("utf-8")) except Exception: continue choices = chunk.get("choices") or [] if not choices: continue delta = choices[0].get("delta") or {} content_piece = delta.get("content") if content_piece: yield content_piece def get_provider_info(self) -> dict: """Get information about the current provider configuration.""" return { "provider": "openrouter", "model": self.model_id, "requires": ["OPENROUTER_API_KEY"], }