feat: add web tools

This commit is contained in:
Peter Steinberger
2026-01-15 04:07:29 +00:00
parent 31d3aef8d6
commit f275cc180b
18 changed files with 736 additions and 165 deletions

View File

@@ -17,6 +17,7 @@ import { createSessionsHistoryTool } from "./tools/sessions-history-tool.js";
import { createSessionsListTool } from "./tools/sessions-list-tool.js";
import { createSessionsSendTool } from "./tools/sessions-send-tool.js";
import { createSessionsSpawnTool } from "./tools/sessions-spawn-tool.js";
import { createWebFetchTool, createWebSearchTool } from "./tools/web-tools.js";
export function createClawdbotTools(options?: {
browserControlUrl?: string;
@@ -56,6 +57,14 @@ export function createClawdbotTools(options?: {
config: options?.config,
agentSessionKey: options?.agentSessionKey,
});
const webSearchTool = createWebSearchTool({
config: options?.config,
sandboxed: options?.sandboxed,
});
const webFetchTool = createWebFetchTool({
config: options?.config,
sandboxed: options?.sandboxed,
});
const tools: AnyAgentTool[] = [
createBrowserTool({
defaultControlUrl: options?.browserControlUrl,
@@ -103,6 +112,8 @@ export function createClawdbotTools(options?: {
config: options?.config,
}),
...(memorySearchTool && memoryGetTool ? [memorySearchTool, memoryGetTool] : []),
...(webSearchTool ? [webSearchTool] : []),
...(webFetchTool ? [webFetchTool] : []),
...(imageTool ? [imageTool] : []),
];

View File

@@ -54,6 +54,8 @@ export function buildAgentSystemPrompt(params: {
ls: "List directory contents",
exec: "Run shell commands",
process: "Manage background exec sessions",
web_search: "Search the web (Brave API)",
web_fetch: "Fetch and extract readable content from a URL",
// Channel docking: add login tools here when a channel needs interactive linking.
browser: "Control web browser",
canvas: "Present/eval/snapshot the Canvas",
@@ -81,6 +83,8 @@ export function buildAgentSystemPrompt(params: {
"ls",
"exec",
"process",
"web_search",
"web_fetch",
"browser",
"canvas",
"nodes",

View File

@@ -277,6 +277,16 @@
"title": "Memory Get",
"detailKeys": ["path", "from", "lines"]
},
"web_search": {
"emoji": "🔎",
"title": "Web Search",
"detailKeys": ["query", "count"]
},
"web_fetch": {
"emoji": "📄",
"title": "Web Fetch",
"detailKeys": ["url", "extractMode", "maxChars"]
},
"whatsapp_login": {
"emoji": "🟢",
"title": "WhatsApp Login",

View File

@@ -13,6 +13,7 @@ const TOOL_NAME_ALIASES: Record<string, string> = {
export const TOOL_GROUPS: Record<string, string[]> = {
// NOTE: Keep canonical (lowercase) tool names here.
"group:memory": ["memory_search", "memory_get"],
"group:web": ["web_search", "web_fetch"],
// Basic workspace/file tools
"group:fs": ["read", "write", "edit", "apply_patch"],
// Host/runtime execution tools
@@ -49,6 +50,8 @@ export const TOOL_GROUPS: Record<string, string[]> = {
"session_status",
"memory_search",
"memory_get",
"web_search",
"web_fetch",
"image",
],
};

View File

@@ -0,0 +1,480 @@
import { Type } from "@sinclair/typebox";
import type { ClawdbotConfig } from "../../config/config.js";
import { VERSION } from "../../version.js";
import { stringEnum } from "../schema/typebox.js";
import type { AnyAgentTool } from "./common.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
const SEARCH_PROVIDERS = ["brave"] as const;
const EXTRACT_MODES = ["markdown", "text"] as const;
const DEFAULT_SEARCH_COUNT = 5;
const MAX_SEARCH_COUNT = 10;
const DEFAULT_FETCH_MAX_CHARS = 50_000;
const DEFAULT_TIMEOUT_SECONDS = 30;
const DEFAULT_CACHE_TTL_MINUTES = 15;
const DEFAULT_CACHE_MAX_ENTRIES = 100;
const BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search";
type WebSearchConfig = NonNullable<ClawdbotConfig["tools"]>["web"] extends infer Web
? Web extends { search?: infer Search }
? Search
: undefined
: undefined;
type WebFetchConfig = NonNullable<ClawdbotConfig["tools"]>["web"] extends infer Web
? Web extends { fetch?: infer Fetch }
? Fetch
: undefined
: undefined;
type CacheEntry<T> = {
value: T;
expiresAt: number;
insertedAt: number;
};
const SEARCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
const FETCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
const WebSearchSchema = Type.Object({
query: Type.String({ description: "Search query string." }),
count: Type.Optional(
Type.Number({
description: "Number of results to return (1-10).",
minimum: 1,
maximum: MAX_SEARCH_COUNT,
}),
),
});
const WebFetchSchema = Type.Object({
url: Type.String({ description: "HTTP or HTTPS URL to fetch." }),
extractMode: Type.Optional(
stringEnum(EXTRACT_MODES, {
description: 'Extraction mode ("markdown" or "text").',
default: "markdown",
}),
),
maxChars: Type.Optional(
Type.Number({
description: "Maximum characters to return (truncates when exceeded).",
minimum: 100,
}),
),
});
type BraveSearchResult = {
title?: string;
url?: string;
description?: string;
age?: string;
};
type BraveSearchResponse = {
web?: {
results?: BraveSearchResult[];
};
};
function resolveSearchConfig(cfg?: ClawdbotConfig): WebSearchConfig {
const search = cfg?.tools?.web?.search;
if (!search || typeof search !== "object") return undefined;
return search as WebSearchConfig;
}
function resolveFetchConfig(cfg?: ClawdbotConfig): WebFetchConfig {
const fetch = cfg?.tools?.web?.fetch;
if (!fetch || typeof fetch !== "object") return undefined;
return fetch as WebFetchConfig;
}
function resolveSearchEnabled(params: { search?: WebSearchConfig; sandboxed?: boolean }): boolean {
if (typeof params.search?.enabled === "boolean") return params.search.enabled;
if (params.sandboxed) return true;
return true;
}
function resolveFetchEnabled(params: { fetch?: WebFetchConfig; sandboxed?: boolean }): boolean {
if (typeof params.fetch?.enabled === "boolean") return params.fetch.enabled;
if (params.sandboxed) return true;
return false;
}
function resolveSearchApiKey(search?: WebSearchConfig): string | undefined {
const fromConfig =
search && "apiKey" in search && typeof search.apiKey === "string"
? search.apiKey.trim()
: "";
const fromEnv = (process.env.BRAVE_API_KEY ?? "").trim();
return fromConfig || fromEnv || undefined;
}
function resolveSearchProvider(search?: WebSearchConfig): (typeof SEARCH_PROVIDERS)[number] {
const raw =
search && "provider" in search && typeof search.provider === "string"
? search.provider.trim().toLowerCase()
: "";
if (raw === "brave") return "brave";
return "brave";
}
function resolveTimeoutSeconds(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
return Math.max(1, Math.floor(parsed));
}
function resolveCacheTtlMs(value: unknown, fallbackMinutes: number): number {
const minutes =
typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : fallbackMinutes;
return Math.round(minutes * 60_000);
}
function resolveMaxChars(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
return Math.max(100, Math.floor(parsed));
}
function resolveSearchCount(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
const clamped = Math.max(1, Math.min(MAX_SEARCH_COUNT, Math.floor(parsed)));
return clamped;
}
function normalizeCacheKey(value: string): string {
return value.trim().toLowerCase();
}
function readCache<T>(
cache: Map<string, CacheEntry<T>>,
key: string,
): { value: T; cached: boolean } | null {
const entry = cache.get(key);
if (!entry) return null;
if (Date.now() > entry.expiresAt) {
cache.delete(key);
return null;
}
return { value: entry.value, cached: true };
}
function writeCache<T>(
cache: Map<string, CacheEntry<T>>,
key: string,
value: T,
ttlMs: number,
) {
if (ttlMs <= 0) return;
if (cache.size >= DEFAULT_CACHE_MAX_ENTRIES) {
const oldest = cache.keys().next();
if (!oldest.done) cache.delete(oldest.value);
}
cache.set(key, {
value,
expiresAt: Date.now() + ttlMs,
insertedAt: Date.now(),
});
}
function withTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal {
if (timeoutMs <= 0) return signal ?? new AbortController().signal;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
if (signal) {
signal.addEventListener(
"abort",
() => {
clearTimeout(timer);
controller.abort();
},
{ once: true },
);
}
controller.signal.addEventListener(
"abort",
() => {
clearTimeout(timer);
},
{ once: true },
);
return controller.signal;
}
function decodeEntities(value: string): string {
return value
.replace(/&nbsp;/gi, " ")
.replace(/&amp;/gi, "&")
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, "'")
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">")
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)))
.replace(/&#(\d+);/gi, (_, dec) => String.fromCharCode(Number.parseInt(dec, 10)));
}
function stripTags(value: string): string {
return decodeEntities(value.replace(/<[^>]+>/g, ""));
}
function normalizeWhitespace(value: string): string {
return value
.replace(/\r/g, "")
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
}
function htmlToMarkdown(html: string): { text: string; title?: string } {
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined;
let text = html
.replace(/<script[\s\S]*?<\/script>/gi, "")
.replace(/<style[\s\S]*?<\/style>/gi, "")
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
text = text.replace(/<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, href, body) => {
const label = normalizeWhitespace(stripTags(body));
if (!label) return href;
return `[${label}](${href})`;
});
text = text.replace(/<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi, (_, level, body) => {
const prefix = "#".repeat(Math.max(1, Math.min(6, Number.parseInt(level, 10))));
const label = normalizeWhitespace(stripTags(body));
return `\n${prefix} ${label}\n`;
});
text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, body) => {
const label = normalizeWhitespace(stripTags(body));
return label ? `\n- ${label}` : "";
});
text = text
.replace(/<(br|hr)\s*\/?>/gi, "\n")
.replace(/<\/(p|div|section|article|header|footer|table|tr|ul|ol)>/gi, "\n");
text = stripTags(text);
text = normalizeWhitespace(text);
return { text, title };
}
function htmlToText(html: string): { text: string; title?: string } {
const { text, title } = htmlToMarkdown(html);
return { text, title };
}
function truncateText(value: string, maxChars: number): { text: string; truncated: boolean } {
if (value.length <= maxChars) return { text: value, truncated: false };
return { text: value.slice(0, maxChars), truncated: true };
}
function resolveSiteName(url: string | undefined): string | undefined {
if (!url) return undefined;
try {
return new URL(url).hostname;
} catch {
return undefined;
}
}
async function readResponseText(res: Response): Promise<string> {
try {
return await res.text();
} catch {
return "";
}
}
async function runWebSearch(params: {
query: string;
count: number;
apiKey: string;
timeoutSeconds: number;
cacheTtlMs: number;
provider: (typeof SEARCH_PROVIDERS)[number];
}): Promise<Record<string, unknown>> {
const cacheKey = normalizeCacheKey(`${params.provider}:${params.query}:${params.count}`);
const cached = readCache(SEARCH_CACHE, cacheKey);
if (cached) return { ...cached.value, cached: true };
const start = Date.now();
if (params.provider !== "brave") {
throw new Error("Unsupported web search provider.");
}
const url = new URL(BRAVE_SEARCH_ENDPOINT);
url.searchParams.set("q", params.query);
url.searchParams.set("count", String(params.count));
const res = await fetch(url.toString(), {
method: "GET",
headers: {
Accept: "application/json",
"X-Subscription-Token": params.apiKey,
},
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
if (!res.ok) {
const detail = await readResponseText(res);
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
}
const data = (await res.json()) as BraveSearchResponse;
const results = Array.isArray(data.web?.results) ? data.web?.results ?? [] : [];
const mapped = results.map((entry) => ({
title: entry.title ?? "",
url: entry.url ?? "",
description: entry.description ?? "",
published: entry.age ?? undefined,
siteName: resolveSiteName(entry.url ?? ""),
}));
const payload = {
query: params.query,
provider: params.provider,
count: mapped.length,
tookMs: Date.now() - start,
results: mapped,
};
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
}
async function runWebFetch(params: {
url: string;
extractMode: (typeof EXTRACT_MODES)[number];
maxChars: number;
timeoutSeconds: number;
cacheTtlMs: number;
userAgent: string;
}): Promise<Record<string, unknown>> {
const cacheKey = normalizeCacheKey(
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
);
const cached = readCache(FETCH_CACHE, cacheKey);
if (cached) return { ...cached.value, cached: true };
let parsedUrl: URL;
try {
parsedUrl = new URL(params.url);
} catch {
throw new Error("Invalid URL: must be http or https");
}
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
throw new Error("Invalid URL: must be http or https");
}
const start = Date.now();
const res = await fetch(parsedUrl.toString(), {
method: "GET",
headers: {
Accept: "*/*",
"User-Agent": params.userAgent,
},
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
if (!res.ok) {
const detail = await readResponseText(res);
throw new Error(`Web fetch failed (${res.status}): ${detail || res.statusText}`);
}
const contentType = res.headers.get("content-type") ?? "application/octet-stream";
const body = await readResponseText(res);
let title: string | undefined;
let text = body;
if (contentType.includes("text/html")) {
const parsed = params.extractMode === "text" ? htmlToText(body) : htmlToMarkdown(body);
text = parsed.text;
title = parsed.title;
} else if (contentType.includes("application/json")) {
try {
text = JSON.stringify(JSON.parse(body), null, 2);
} catch {
text = body;
}
}
const truncated = truncateText(text, params.maxChars);
const payload = {
url: params.url,
finalUrl: res.url || params.url,
status: res.status,
contentType,
title,
extractMode: params.extractMode,
truncated: truncated.truncated,
length: truncated.text.length,
fetchedAt: new Date().toISOString(),
tookMs: Date.now() - start,
text: truncated.text,
};
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
}
export function createWebSearchTool(options?: {
config?: ClawdbotConfig;
sandboxed?: boolean;
}): AnyAgentTool | null {
const search = resolveSearchConfig(options?.config);
if (!resolveSearchEnabled({ search, sandboxed: options?.sandboxed })) return null;
const apiKey = resolveSearchApiKey(search);
if (!apiKey) return null;
return {
label: "Web Search",
name: "web_search",
description:
"Search the web using Brave Search API. Returns titles, URLs, and snippets for fast research.",
parameters: WebSearchSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const query = readStringParam(params, "query", { required: true });
const count =
readNumberParam(params, "count", { integer: true }) ?? search?.maxResults ?? undefined;
const result = await runWebSearch({
query,
count: resolveSearchCount(count, DEFAULT_SEARCH_COUNT),
apiKey,
timeoutSeconds: resolveTimeoutSeconds(search?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
cacheTtlMs: resolveCacheTtlMs(search?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
provider: resolveSearchProvider(search),
});
return jsonResult(result);
},
};
}
export function createWebFetchTool(options?: {
config?: ClawdbotConfig;
sandboxed?: boolean;
}): AnyAgentTool | null {
const fetch = resolveFetchConfig(options?.config);
if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) return null;
const userAgent =
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
`clawdbot/${VERSION}`;
return {
label: "Web Fetch",
name: "web_fetch",
description:
"Fetch and extract readable content from a URL (HTML → markdown/text). Use for lightweight page access without browser automation.",
parameters: WebFetchSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const url = readStringParam(params, "url", { required: true });
const extractMode =
readStringParam(params, "extractMode") === "text" ? "text" : "markdown";
const maxChars = readNumberParam(params, "maxChars", { integer: true });
const result = await runWebFetch({
url,
extractMode,
maxChars: resolveMaxChars(maxChars ?? fetch?.maxChars, DEFAULT_FETCH_MAX_CHARS),
timeoutSeconds: resolveTimeoutSeconds(fetch?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
userAgent,
});
return jsonResult(result);
},
};
}

View File

@@ -105,6 +105,17 @@ const FIELD_LABELS: Record<string, string> = {
"agents.list[].tools.byProvider": "Agent Tool Policy by Provider",
"tools.exec.applyPatch.enabled": "Enable apply_patch",
"tools.exec.applyPatch.allowModels": "apply_patch Model Allowlist",
"tools.web.search.enabled": "Enable Web Search Tool",
"tools.web.search.provider": "Web Search Provider",
"tools.web.search.apiKey": "Brave Search API Key",
"tools.web.search.maxResults": "Web Search Max Results",
"tools.web.search.timeoutSeconds": "Web Search Timeout (sec)",
"tools.web.search.cacheTtlMinutes": "Web Search Cache TTL (min)",
"tools.web.fetch.enabled": "Enable Web Fetch Tool",
"tools.web.fetch.maxChars": "Web Fetch Max Chars",
"tools.web.fetch.timeoutSeconds": "Web Fetch Timeout (sec)",
"tools.web.fetch.cacheTtlMinutes": "Web Fetch Cache TTL (min)",
"tools.web.fetch.userAgent": "Web Fetch User-Agent",
"gateway.controlUi.basePath": "Control UI Base Path",
"gateway.http.endpoints.chatCompletions.enabled": "OpenAI Chat Completions Endpoint",
"gateway.reload.mode": "Config Reload Mode",
@@ -219,6 +230,17 @@ const FIELD_HELP: Record<string, string> = {
"Experimental. Enables apply_patch for OpenAI models when allowed by tool policy.",
"tools.exec.applyPatch.allowModels":
'Optional allowlist of model ids (e.g. "gpt-5.2" or "openai/gpt-5.2").',
"tools.web.search.enabled": "Enable the web_search tool (requires Brave API key).",
"tools.web.search.provider": 'Search provider (only "brave" supported today).',
"tools.web.search.apiKey": "Brave Search API key (fallback: BRAVE_API_KEY env var).",
"tools.web.search.maxResults": "Default number of results to return (1-10).",
"tools.web.search.timeoutSeconds": "Timeout in seconds for web_search requests.",
"tools.web.search.cacheTtlMinutes": "Cache TTL in minutes for web_search results.",
"tools.web.fetch.enabled": "Enable the web_fetch tool (lightweight HTTP fetch).",
"tools.web.fetch.maxChars": "Max characters returned by web_fetch (truncated).",
"tools.web.fetch.timeoutSeconds": "Timeout in seconds for web_fetch requests.",
"tools.web.fetch.cacheTtlMinutes": "Cache TTL in minutes for web_fetch results.",
"tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.",
"channels.slack.allowBots":
"Allow bot-authored messages to trigger Slack replies (default: false).",
"auth.profiles": "Named auth profiles (provider + mode + optional email).",

View File

@@ -73,6 +73,34 @@ export type ToolsConfig = {
profile?: ToolProfileId;
allow?: string[];
deny?: string[];
web?: {
search?: {
/** Enable web search tool (default: true when API key is present). */
enabled?: boolean;
/** Search provider (currently "brave"). */
provider?: "brave";
/** Brave Search API key (optional; defaults to BRAVE_API_KEY env var). */
apiKey?: string;
/** Default search results count (1-10). */
maxResults?: number;
/** Timeout in seconds for search requests. */
timeoutSeconds?: number;
/** Cache TTL in minutes for search results. */
cacheTtlMinutes?: number;
};
fetch?: {
/** Enable web fetch tool (default: false). */
enabled?: boolean;
/** Max characters to return from fetched content. */
maxChars?: number;
/** Timeout in seconds for fetch requests. */
timeoutSeconds?: number;
/** Cache TTL in minutes for fetched content. */
cacheTtlMinutes?: number;
/** Override User-Agent header for fetch requests. */
userAgent?: string;
};
};
audio?: {
transcription?: {
/** CLI args (template-enabled). */

View File

@@ -114,6 +114,34 @@ export const ToolPolicySchema = z
})
.optional();
export const ToolsWebSearchSchema = z
.object({
enabled: z.boolean().optional(),
provider: z.union([z.literal("brave")]).optional(),
apiKey: z.string().optional(),
maxResults: z.number().int().positive().optional(),
timeoutSeconds: z.number().int().positive().optional(),
cacheTtlMinutes: z.number().nonnegative().optional(),
})
.optional();
export const ToolsWebFetchSchema = z
.object({
enabled: z.boolean().optional(),
maxChars: z.number().int().positive().optional(),
timeoutSeconds: z.number().int().positive().optional(),
cacheTtlMinutes: z.number().nonnegative().optional(),
userAgent: z.string().optional(),
})
.optional();
export const ToolsWebSchema = z
.object({
search: ToolsWebSearchSchema,
fetch: ToolsWebFetchSchema,
})
.optional();
export const ToolProfileSchema = z
.union([z.literal("minimal"), z.literal("coding"), z.literal("messaging"), z.literal("full")])
.optional();
@@ -245,6 +273,7 @@ export const ToolsSchema = z
profile: ToolProfileSchema,
allow: z.array(z.string()).optional(),
deny: z.array(z.string()).optional(),
web: ToolsWebSchema,
audio: z
.object({
transcription: ToolsAudioTranscriptionSchema,