import { LMStudioClient } from "@lmstudio/sdk"; import { Ollama } from "ollama/browser"; /** * Discover models from an Ollama server. * @param baseUrl - Base URL of the Ollama server (e.g., "http://localhost:11434") * @param apiKey - Optional API key (currently unused by Ollama) * @returns Array of discovered models */ export async function discoverOllamaModels(baseUrl, _apiKey) { try { // Create Ollama client const ollama = new Ollama({ host: baseUrl }); // Get list of available models const { models } = await ollama.list(); // Fetch details for each model and convert to Model format const ollamaModelPromises = models.map(async (model) => { try { // Get model details const details = await ollama.show({ model: model.name, }); // Check capabilities - filter out models that don't support tools const capabilities = details.capabilities || []; if (!capabilities.includes("tools")) { console.debug(`Skipping model ${model.name}: does not support tools`); return null; } // Extract model info const modelInfo = details.model_info || {}; // Get context window size - look for architecture-specific keys const architecture = modelInfo["general.architecture"] || ""; const contextKey = `${architecture}.context_length`; const contextWindow = parseInt(modelInfo[contextKey] || "8192", 10); // Ollama caps max tokens at 10x context length const maxTokens = contextWindow * 10; // Ollama only supports completions API const ollamaModel = { id: model.name, name: model.name, api: "openai-completions", provider: "", // Will be set by caller baseUrl: `${baseUrl}/v1`, reasoning: capabilities.includes("thinking"), input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: contextWindow, maxTokens: maxTokens, }; return ollamaModel; } catch (err) { console.error(`Failed to fetch details for model ${model.name}:`, err); return null; } }); const results = await Promise.all(ollamaModelPromises); return results.filter((m) => m !== null); } catch (err) { console.error("Failed to discover Ollama models:", err); throw new Error(`Ollama discovery failed: ${err instanceof Error ? err.message : String(err)}`); } } /** * Discover models from a llama.cpp server via OpenAI-compatible /v1/models endpoint. * @param baseUrl - Base URL of the llama.cpp server (e.g., "http://localhost:8080") * @param apiKey - Optional API key * @returns Array of discovered models */ export async function discoverLlamaCppModels(baseUrl, apiKey) { try { const headers = { "Content-Type": "application/json", }; if (apiKey) { headers.Authorization = `Bearer ${apiKey}`; } const response = await fetch(`${baseUrl}/v1/models`, { method: "GET", headers, }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const data = await response.json(); if (!data.data || !Array.isArray(data.data)) { throw new Error("Invalid response format from llama.cpp server"); } return data.data.map((model) => { // llama.cpp doesn't always provide context window info const contextWindow = model.context_length || 8192; const maxTokens = model.max_tokens || 4096; const llamaModel = { id: model.id, name: model.id, api: "openai-completions", provider: "", // Will be set by caller baseUrl: `${baseUrl}/v1`, reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: contextWindow, maxTokens: maxTokens, }; return llamaModel; }); } catch (err) { console.error("Failed to discover llama.cpp models:", err); throw new Error(`llama.cpp discovery failed: ${err instanceof Error ? err.message : String(err)}`); } } /** * Discover models from a vLLM server via OpenAI-compatible /v1/models endpoint. * @param baseUrl - Base URL of the vLLM server (e.g., "http://localhost:8000") * @param apiKey - Optional API key * @returns Array of discovered models */ export async function discoverVLLMModels(baseUrl, apiKey) { try { const headers = { "Content-Type": "application/json", }; if (apiKey) { headers.Authorization = `Bearer ${apiKey}`; } const response = await fetch(`${baseUrl}/v1/models`, { method: "GET", headers, }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const data = await response.json(); if (!data.data || !Array.isArray(data.data)) { throw new Error("Invalid response format from vLLM server"); } return data.data.map((model) => { // vLLM provides max_model_len which is the context window const contextWindow = model.max_model_len || 8192; const maxTokens = Math.min(contextWindow, 4096); // Cap max tokens const vllmModel = { id: model.id, name: model.id, api: "openai-completions", provider: "", // Will be set by caller baseUrl: `${baseUrl}/v1`, reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: contextWindow, maxTokens: maxTokens, }; return vllmModel; }); } catch (err) { console.error("Failed to discover vLLM models:", err); throw new Error(`vLLM discovery failed: ${err instanceof Error ? err.message : String(err)}`); } } /** * Discover models from an LM Studio server using the LM Studio SDK. * @param baseUrl - Base URL of the LM Studio server (e.g., "http://localhost:1234") * @param apiKey - Optional API key (unused for LM Studio SDK) * @returns Array of discovered models */ export async function discoverLMStudioModels(baseUrl, _apiKey) { try { // Extract host and port from baseUrl const url = new URL(baseUrl); const port = url.port ? parseInt(url.port, 10) : 1234; // Create LM Studio client const client = new LMStudioClient({ baseUrl: `ws://${url.hostname}:${port}` }); // List all downloaded models const models = await client.system.listDownloadedModels(); // Filter to only LLM models and map to our Model format return models .filter((model) => model.type === "llm") .map((model) => { const contextWindow = model.maxContextLength; // Use 10x context length like Ollama does const maxTokens = contextWindow; const lmStudioModel = { id: model.path, name: model.displayName || model.path, api: "openai-completions", provider: "", // Will be set by caller baseUrl: `${baseUrl}/v1`, reasoning: model.trainedForToolUse || false, input: model.vision ? ["text", "image"] : ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: contextWindow, maxTokens: maxTokens, }; return lmStudioModel; }); } catch (err) { console.error("Failed to discover LM Studio models:", err); throw new Error(`LM Studio discovery failed: ${err instanceof Error ? err.message : String(err)}`); } } /** * Convenience function to discover models based on provider type. * @param type - Provider type * @param baseUrl - Base URL of the server * @param apiKey - Optional API key * @returns Array of discovered models */ export async function discoverModels(type, baseUrl, apiKey) { switch (type) { case "ollama": return discoverOllamaModels(baseUrl, apiKey); case "llama.cpp": return discoverLlamaCppModels(baseUrl, apiKey); case "vllm": return discoverVLLMModels(baseUrl, apiKey); case "lmstudio": return discoverLMStudioModels(baseUrl, apiKey); } } //# sourceMappingURL=model-discovery.js.map