diff --git a/.gitignore b/.gitignore index e93f42f..e9e6d77 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,11 @@ ENV/ *.sqlite *.sqlite3 +# Environment variables +.env +.env.local +.env.*.local + # Logs *.log diff --git a/backend/.env.example b/backend/.env.example new file mode 100644 index 0000000..9428102 --- /dev/null +++ b/backend/.env.example @@ -0,0 +1,51 @@ +# LLM Configuration for The Island Backend +# Copy this file to .env and fill in your values + +# ============================================================================= +# Option 1: OpenAI (default) +# ============================================================================= +# OPENAI_API_KEY=sk-xxx +# LLM_MODEL=gpt-3.5-turbo + +# ============================================================================= +# Option 2: Anthropic Claude +# ============================================================================= +# ANTHROPIC_API_KEY=sk-ant-xxx +# LLM_MODEL=claude-3-haiku-20240307 + +# ============================================================================= +# Option 3: Google Gemini +# ============================================================================= +# GEMINI_API_KEY=xxx +# LLM_MODEL=gemini/gemini-pro + +# ============================================================================= +# Option 4: Azure OpenAI +# ============================================================================= +# AZURE_API_KEY=xxx +# AZURE_API_BASE=https://your-resource.openai.azure.com +# LLM_MODEL=azure/your-deployment-name + +# ============================================================================= +# Option 5: OpenRouter (access multiple models) +# ============================================================================= +# OPENROUTER_API_KEY=sk-or-xxx +# LLM_MODEL=openrouter/anthropic/claude-3-haiku + +# ============================================================================= +# Option 6: Local Ollama +# ============================================================================= +# OLLAMA_API_BASE=http://localhost:11434 +# LLM_MODEL=ollama/llama2 + +# ============================================================================= +# Option 7: Custom/Self-hosted (OpenAI-compatible endpoint) +# ============================================================================= +# LLM_API_BASE=http://localhost:8000/v1 +# LLM_API_KEY=your-key +# LLM_MODEL=qwen2.5 + +# ============================================================================= +# Force mock mode (no API calls, uses predefined responses) +# ============================================================================= +# LLM_MOCK_MODE=true diff --git a/backend/app/llm.py b/backend/app/llm.py index 133e5f9..f738cb7 100644 --- a/backend/app/llm.py +++ b/backend/app/llm.py @@ -1,6 +1,22 @@ """ LLM Service - Agent Brain Module. -Provides AI-powered responses for agents using OpenAI's API. +Provides AI-powered responses for agents using LiteLLM (supports multiple providers). + +Supported providers (via environment variables): +- OpenAI: OPENAI_API_KEY → model="gpt-3.5-turbo" or "gpt-4" +- Anthropic: ANTHROPIC_API_KEY → model="claude-3-haiku-20240307" or "claude-3-sonnet-20240229" +- Google: GEMINI_API_KEY → model="gemini/gemini-pro" +- Azure OpenAI: AZURE_API_KEY + AZURE_API_BASE → model="azure/" +- OpenRouter: OPENROUTER_API_KEY → model="openrouter/" +- Ollama (local): OLLAMA_API_BASE → model="ollama/llama2" +- Custom/Self-hosted: LLM_API_KEY + LLM_API_BASE → any OpenAI-compatible endpoint +- And 100+ more providers... + +Configuration: +- LLM_MODEL: Model to use (default: gpt-3.5-turbo) +- LLM_API_BASE: Custom API base URL (for self-hosted or proxy services) +- LLM_API_KEY: Generic API key (used with LLM_API_BASE) +- LLM_MOCK_MODE: Set to "true" to force mock mode """ import logging @@ -41,42 +57,90 @@ MOCK_REACTIONS = { ], } +# Default model configuration +DEFAULT_MODEL = "gpt-3.5-turbo" + class LLMService: """ - Service for generating AI-powered agent reactions. - Falls back to mock responses if API key is not configured. + Service for generating AI-powered agent reactions using LiteLLM. + Supports multiple LLM providers through a unified interface. + Falls back to mock responses if no API key is configured. """ def __init__(self) -> None: - """Initialize the LLM service with OpenAI client or mock mode.""" - self._api_key = os.environ.get("OPENAI_API_KEY") - self._client = None - self._mock_mode = False + """Initialize the LLM service with LiteLLM or mock mode.""" + self._model = os.environ.get("LLM_MODEL", DEFAULT_MODEL) + self._api_base = os.environ.get("LLM_API_BASE") # Custom base URL + self._api_key = os.environ.get("LLM_API_KEY") # Generic API key + self._mock_mode = os.environ.get("LLM_MOCK_MODE", "").lower() == "true" + self._acompletion = None - if not self._api_key: + if self._mock_mode: + logger.info("LLMService running in MOCK mode (forced by LLM_MOCK_MODE)") + return + + # Check for any supported API key (order matters for provider detection) + api_keys = { + "OPENAI_API_KEY": "OpenAI", + "ANTHROPIC_API_KEY": "Anthropic", + "GEMINI_API_KEY": "Google Gemini", + "AZURE_API_KEY": "Azure OpenAI", + "AZURE_API_BASE": "Azure OpenAI", + "OPENROUTER_API_KEY": "OpenRouter", + "COHERE_API_KEY": "Cohere", + "HUGGINGFACE_API_KEY": "HuggingFace", + "OLLAMA_API_BASE": "Ollama (local)", + "LLM_API_KEY": "Custom (with LLM_API_BASE)", + "LLM_API_BASE": "Custom endpoint", + } + + found_provider = None + for key, provider in api_keys.items(): + if os.environ.get(key): + found_provider = provider + break + + if not found_provider: logger.warning( - "OPENAI_API_KEY not found in environment. " - "LLMService running in MOCK mode - using predefined responses." + "No LLM API key found in environment. " + "LLMService running in MOCK mode - using predefined responses. " + f"Supported keys: {', '.join(api_keys.keys())}" ) self._mock_mode = True - else: - try: - from openai import AsyncOpenAI - self._client = AsyncOpenAI(api_key=self._api_key) - logger.info("LLMService initialized with OpenAI API") - except ImportError: - logger.error("openai package not installed. Running in MOCK mode.") - self._mock_mode = True - except Exception as e: - logger.error(f"Failed to initialize OpenAI client: {e}. Running in MOCK mode.") - self._mock_mode = True + return + + try: + from litellm import acompletion + self._acompletion = acompletion + + # Log configuration details + config_info = f"provider: {found_provider}, model: {self._model}" + if self._api_base: + config_info += f", api_base: {self._api_base}" + logger.info(f"LLMService initialized with LiteLLM ({config_info})") + except ImportError: + logger.error("litellm package not installed. Running in MOCK mode.") + self._mock_mode = True + except Exception as e: + logger.error(f"Failed to initialize LiteLLM: {e}. Running in MOCK mode.") + self._mock_mode = True @property def is_mock_mode(self) -> bool: """Check if service is running in mock mode.""" return self._mock_mode + @property + def model(self) -> str: + """Get the current model name.""" + return self._model + + @property + def api_base(self) -> str | None: + """Get the custom API base URL if configured.""" + return self._api_base + def _get_mock_response(self, event_type: str = "feed") -> str: """Get a random mock response for testing without API.""" responses = MOCK_REACTIONS.get(event_type, MOCK_REACTIONS["feed"]) @@ -112,15 +176,22 @@ class LLMService: f"Respond in first person, as if speaking out loud." ) - response = await self._client.chat.completions.create( - model="gpt-3.5-turbo", - messages=[ + # Build kwargs for acompletion + kwargs = { + "model": self._model, + "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": event_description} ], - max_tokens=50, - temperature=0.8, - ) + "max_tokens": 50, + "temperature": 0.8, + } + if self._api_base: + kwargs["api_base"] = self._api_base + if self._api_key: + kwargs["api_key"] = self._api_key + + response = await self._acompletion(**kwargs) return response.choices[0].message.content.strip() @@ -165,15 +236,22 @@ class LLMService: f"Speak naturally, as if talking to yourself or nearby survivors." ) - response = await self._client.chat.completions.create( - model="gpt-3.5-turbo", - messages=[ + # Build kwargs for acompletion + kwargs = { + "model": self._model, + "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": "What are you thinking right now?"} ], - max_tokens=40, - temperature=0.9, - ) + "max_tokens": 40, + "temperature": 0.9, + } + if self._api_base: + kwargs["api_base"] = self._api_base + if self._api_key: + kwargs["api_key"] = self._api_key + + response = await self._acompletion(**kwargs) return response.choices[0].message.content.strip() diff --git a/backend/app/main.py b/backend/app/main.py index 496da24..04c175a 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -3,6 +3,10 @@ FastAPI entry point for the interactive live-stream game backend. Configures the application, WebSocket routes, and lifecycle events. """ +# Load .env file before any other imports +from dotenv import load_dotenv +load_dotenv() + import logging from contextlib import asynccontextmanager from pathlib import Path diff --git a/backend/requirements.txt b/backend/requirements.txt index 87131e9..42ef0a8 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -4,4 +4,5 @@ websockets>=12.0 pydantic>=2.5.0 sqlalchemy>=2.0.0 aiosqlite>=0.19.0 -openai>=1.0.0 +litellm>=1.40.0 +python-dotenv>=1.0.0