|
|
|
|
@@ -0,0 +1,480 @@
|
|
|
|
|
import { Type } from "@sinclair/typebox";
|
|
|
|
|
|
|
|
|
|
import type { ClawdbotConfig } from "../../config/config.js";
|
|
|
|
|
import { VERSION } from "../../version.js";
|
|
|
|
|
import { stringEnum } from "../schema/typebox.js";
|
|
|
|
|
import type { AnyAgentTool } from "./common.js";
|
|
|
|
|
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
|
|
|
|
|
|
|
|
|
|
const SEARCH_PROVIDERS = ["brave"] as const;
|
|
|
|
|
const EXTRACT_MODES = ["markdown", "text"] as const;
|
|
|
|
|
|
|
|
|
|
const DEFAULT_SEARCH_COUNT = 5;
|
|
|
|
|
const MAX_SEARCH_COUNT = 10;
|
|
|
|
|
const DEFAULT_FETCH_MAX_CHARS = 50_000;
|
|
|
|
|
const DEFAULT_TIMEOUT_SECONDS = 30;
|
|
|
|
|
const DEFAULT_CACHE_TTL_MINUTES = 15;
|
|
|
|
|
const DEFAULT_CACHE_MAX_ENTRIES = 100;
|
|
|
|
|
|
|
|
|
|
const BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search";
|
|
|
|
|
|
|
|
|
|
type WebSearchConfig = NonNullable<ClawdbotConfig["tools"]>["web"] extends infer Web
|
|
|
|
|
? Web extends { search?: infer Search }
|
|
|
|
|
? Search
|
|
|
|
|
: undefined
|
|
|
|
|
: undefined;
|
|
|
|
|
|
|
|
|
|
type WebFetchConfig = NonNullable<ClawdbotConfig["tools"]>["web"] extends infer Web
|
|
|
|
|
? Web extends { fetch?: infer Fetch }
|
|
|
|
|
? Fetch
|
|
|
|
|
: undefined
|
|
|
|
|
: undefined;
|
|
|
|
|
|
|
|
|
|
type CacheEntry<T> = {
|
|
|
|
|
value: T;
|
|
|
|
|
expiresAt: number;
|
|
|
|
|
insertedAt: number;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const SEARCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
|
|
|
|
|
const FETCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
|
|
|
|
|
|
|
|
|
|
const WebSearchSchema = Type.Object({
|
|
|
|
|
query: Type.String({ description: "Search query string." }),
|
|
|
|
|
count: Type.Optional(
|
|
|
|
|
Type.Number({
|
|
|
|
|
description: "Number of results to return (1-10).",
|
|
|
|
|
minimum: 1,
|
|
|
|
|
maximum: MAX_SEARCH_COUNT,
|
|
|
|
|
}),
|
|
|
|
|
),
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const WebFetchSchema = Type.Object({
|
|
|
|
|
url: Type.String({ description: "HTTP or HTTPS URL to fetch." }),
|
|
|
|
|
extractMode: Type.Optional(
|
|
|
|
|
stringEnum(EXTRACT_MODES, {
|
|
|
|
|
description: 'Extraction mode ("markdown" or "text").',
|
|
|
|
|
default: "markdown",
|
|
|
|
|
}),
|
|
|
|
|
),
|
|
|
|
|
maxChars: Type.Optional(
|
|
|
|
|
Type.Number({
|
|
|
|
|
description: "Maximum characters to return (truncates when exceeded).",
|
|
|
|
|
minimum: 100,
|
|
|
|
|
}),
|
|
|
|
|
),
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
type BraveSearchResult = {
|
|
|
|
|
title?: string;
|
|
|
|
|
url?: string;
|
|
|
|
|
description?: string;
|
|
|
|
|
age?: string;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type BraveSearchResponse = {
|
|
|
|
|
web?: {
|
|
|
|
|
results?: BraveSearchResult[];
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function resolveSearchConfig(cfg?: ClawdbotConfig): WebSearchConfig {
|
|
|
|
|
const search = cfg?.tools?.web?.search;
|
|
|
|
|
if (!search || typeof search !== "object") return undefined;
|
|
|
|
|
return search as WebSearchConfig;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveFetchConfig(cfg?: ClawdbotConfig): WebFetchConfig {
|
|
|
|
|
const fetch = cfg?.tools?.web?.fetch;
|
|
|
|
|
if (!fetch || typeof fetch !== "object") return undefined;
|
|
|
|
|
return fetch as WebFetchConfig;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveSearchEnabled(params: { search?: WebSearchConfig; sandboxed?: boolean }): boolean {
|
|
|
|
|
if (typeof params.search?.enabled === "boolean") return params.search.enabled;
|
|
|
|
|
if (params.sandboxed) return true;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveFetchEnabled(params: { fetch?: WebFetchConfig; sandboxed?: boolean }): boolean {
|
|
|
|
|
if (typeof params.fetch?.enabled === "boolean") return params.fetch.enabled;
|
|
|
|
|
if (params.sandboxed) return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveSearchApiKey(search?: WebSearchConfig): string | undefined {
|
|
|
|
|
const fromConfig =
|
|
|
|
|
search && "apiKey" in search && typeof search.apiKey === "string"
|
|
|
|
|
? search.apiKey.trim()
|
|
|
|
|
: "";
|
|
|
|
|
const fromEnv = (process.env.BRAVE_API_KEY ?? "").trim();
|
|
|
|
|
return fromConfig || fromEnv || undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveSearchProvider(search?: WebSearchConfig): (typeof SEARCH_PROVIDERS)[number] {
|
|
|
|
|
const raw =
|
|
|
|
|
search && "provider" in search && typeof search.provider === "string"
|
|
|
|
|
? search.provider.trim().toLowerCase()
|
|
|
|
|
: "";
|
|
|
|
|
if (raw === "brave") return "brave";
|
|
|
|
|
return "brave";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveTimeoutSeconds(value: unknown, fallback: number): number {
|
|
|
|
|
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
|
|
|
return Math.max(1, Math.floor(parsed));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveCacheTtlMs(value: unknown, fallbackMinutes: number): number {
|
|
|
|
|
const minutes =
|
|
|
|
|
typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : fallbackMinutes;
|
|
|
|
|
return Math.round(minutes * 60_000);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveMaxChars(value: unknown, fallback: number): number {
|
|
|
|
|
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
|
|
|
return Math.max(100, Math.floor(parsed));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveSearchCount(value: unknown, fallback: number): number {
|
|
|
|
|
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
|
|
|
const clamped = Math.max(1, Math.min(MAX_SEARCH_COUNT, Math.floor(parsed)));
|
|
|
|
|
return clamped;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function normalizeCacheKey(value: string): string {
|
|
|
|
|
return value.trim().toLowerCase();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function readCache<T>(
|
|
|
|
|
cache: Map<string, CacheEntry<T>>,
|
|
|
|
|
key: string,
|
|
|
|
|
): { value: T; cached: boolean } | null {
|
|
|
|
|
const entry = cache.get(key);
|
|
|
|
|
if (!entry) return null;
|
|
|
|
|
if (Date.now() > entry.expiresAt) {
|
|
|
|
|
cache.delete(key);
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
return { value: entry.value, cached: true };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function writeCache<T>(
|
|
|
|
|
cache: Map<string, CacheEntry<T>>,
|
|
|
|
|
key: string,
|
|
|
|
|
value: T,
|
|
|
|
|
ttlMs: number,
|
|
|
|
|
) {
|
|
|
|
|
if (ttlMs <= 0) return;
|
|
|
|
|
if (cache.size >= DEFAULT_CACHE_MAX_ENTRIES) {
|
|
|
|
|
const oldest = cache.keys().next();
|
|
|
|
|
if (!oldest.done) cache.delete(oldest.value);
|
|
|
|
|
}
|
|
|
|
|
cache.set(key, {
|
|
|
|
|
value,
|
|
|
|
|
expiresAt: Date.now() + ttlMs,
|
|
|
|
|
insertedAt: Date.now(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function withTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal {
|
|
|
|
|
if (timeoutMs <= 0) return signal ?? new AbortController().signal;
|
|
|
|
|
const controller = new AbortController();
|
|
|
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
|
|
|
if (signal) {
|
|
|
|
|
signal.addEventListener(
|
|
|
|
|
"abort",
|
|
|
|
|
() => {
|
|
|
|
|
clearTimeout(timer);
|
|
|
|
|
controller.abort();
|
|
|
|
|
},
|
|
|
|
|
{ once: true },
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
controller.signal.addEventListener(
|
|
|
|
|
"abort",
|
|
|
|
|
() => {
|
|
|
|
|
clearTimeout(timer);
|
|
|
|
|
},
|
|
|
|
|
{ once: true },
|
|
|
|
|
);
|
|
|
|
|
return controller.signal;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function decodeEntities(value: string): string {
|
|
|
|
|
return value
|
|
|
|
|
.replace(/ /gi, " ")
|
|
|
|
|
.replace(/&/gi, "&")
|
|
|
|
|
.replace(/"/gi, '"')
|
|
|
|
|
.replace(/'/gi, "'")
|
|
|
|
|
.replace(/</gi, "<")
|
|
|
|
|
.replace(/>/gi, ">")
|
|
|
|
|
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)))
|
|
|
|
|
.replace(/&#(\d+);/gi, (_, dec) => String.fromCharCode(Number.parseInt(dec, 10)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function stripTags(value: string): string {
|
|
|
|
|
return decodeEntities(value.replace(/<[^>]+>/g, ""));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function normalizeWhitespace(value: string): string {
|
|
|
|
|
return value
|
|
|
|
|
.replace(/\r/g, "")
|
|
|
|
|
.replace(/[ \t]+\n/g, "\n")
|
|
|
|
|
.replace(/\n{3,}/g, "\n\n")
|
|
|
|
|
.replace(/[ \t]{2,}/g, " ")
|
|
|
|
|
.trim();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function htmlToMarkdown(html: string): { text: string; title?: string } {
|
|
|
|
|
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
|
|
|
const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined;
|
|
|
|
|
let text = html
|
|
|
|
|
.replace(/<script[\s\S]*?<\/script>/gi, "")
|
|
|
|
|
.replace(/<style[\s\S]*?<\/style>/gi, "")
|
|
|
|
|
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
|
|
|
|
|
text = text.replace(/<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, href, body) => {
|
|
|
|
|
const label = normalizeWhitespace(stripTags(body));
|
|
|
|
|
if (!label) return href;
|
|
|
|
|
return `[${label}](${href})`;
|
|
|
|
|
});
|
|
|
|
|
text = text.replace(/<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi, (_, level, body) => {
|
|
|
|
|
const prefix = "#".repeat(Math.max(1, Math.min(6, Number.parseInt(level, 10))));
|
|
|
|
|
const label = normalizeWhitespace(stripTags(body));
|
|
|
|
|
return `\n${prefix} ${label}\n`;
|
|
|
|
|
});
|
|
|
|
|
text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, body) => {
|
|
|
|
|
const label = normalizeWhitespace(stripTags(body));
|
|
|
|
|
return label ? `\n- ${label}` : "";
|
|
|
|
|
});
|
|
|
|
|
text = text
|
|
|
|
|
.replace(/<(br|hr)\s*\/?>/gi, "\n")
|
|
|
|
|
.replace(/<\/(p|div|section|article|header|footer|table|tr|ul|ol)>/gi, "\n");
|
|
|
|
|
text = stripTags(text);
|
|
|
|
|
text = normalizeWhitespace(text);
|
|
|
|
|
return { text, title };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function htmlToText(html: string): { text: string; title?: string } {
|
|
|
|
|
const { text, title } = htmlToMarkdown(html);
|
|
|
|
|
return { text, title };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function truncateText(value: string, maxChars: number): { text: string; truncated: boolean } {
|
|
|
|
|
if (value.length <= maxChars) return { text: value, truncated: false };
|
|
|
|
|
return { text: value.slice(0, maxChars), truncated: true };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function resolveSiteName(url: string | undefined): string | undefined {
|
|
|
|
|
if (!url) return undefined;
|
|
|
|
|
try {
|
|
|
|
|
return new URL(url).hostname;
|
|
|
|
|
} catch {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function readResponseText(res: Response): Promise<string> {
|
|
|
|
|
try {
|
|
|
|
|
return await res.text();
|
|
|
|
|
} catch {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function runWebSearch(params: {
|
|
|
|
|
query: string;
|
|
|
|
|
count: number;
|
|
|
|
|
apiKey: string;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
cacheTtlMs: number;
|
|
|
|
|
provider: (typeof SEARCH_PROVIDERS)[number];
|
|
|
|
|
}): Promise<Record<string, unknown>> {
|
|
|
|
|
const cacheKey = normalizeCacheKey(`${params.provider}:${params.query}:${params.count}`);
|
|
|
|
|
const cached = readCache(SEARCH_CACHE, cacheKey);
|
|
|
|
|
if (cached) return { ...cached.value, cached: true };
|
|
|
|
|
|
|
|
|
|
const start = Date.now();
|
|
|
|
|
if (params.provider !== "brave") {
|
|
|
|
|
throw new Error("Unsupported web search provider.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const url = new URL(BRAVE_SEARCH_ENDPOINT);
|
|
|
|
|
url.searchParams.set("q", params.query);
|
|
|
|
|
url.searchParams.set("count", String(params.count));
|
|
|
|
|
|
|
|
|
|
const res = await fetch(url.toString(), {
|
|
|
|
|
method: "GET",
|
|
|
|
|
headers: {
|
|
|
|
|
Accept: "application/json",
|
|
|
|
|
"X-Subscription-Token": params.apiKey,
|
|
|
|
|
},
|
|
|
|
|
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
const detail = await readResponseText(res);
|
|
|
|
|
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data = (await res.json()) as BraveSearchResponse;
|
|
|
|
|
const results = Array.isArray(data.web?.results) ? data.web?.results ?? [] : [];
|
|
|
|
|
const mapped = results.map((entry) => ({
|
|
|
|
|
title: entry.title ?? "",
|
|
|
|
|
url: entry.url ?? "",
|
|
|
|
|
description: entry.description ?? "",
|
|
|
|
|
published: entry.age ?? undefined,
|
|
|
|
|
siteName: resolveSiteName(entry.url ?? ""),
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
const payload = {
|
|
|
|
|
query: params.query,
|
|
|
|
|
provider: params.provider,
|
|
|
|
|
count: mapped.length,
|
|
|
|
|
tookMs: Date.now() - start,
|
|
|
|
|
results: mapped,
|
|
|
|
|
};
|
|
|
|
|
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
|
|
|
|
return payload;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function runWebFetch(params: {
|
|
|
|
|
url: string;
|
|
|
|
|
extractMode: (typeof EXTRACT_MODES)[number];
|
|
|
|
|
maxChars: number;
|
|
|
|
|
timeoutSeconds: number;
|
|
|
|
|
cacheTtlMs: number;
|
|
|
|
|
userAgent: string;
|
|
|
|
|
}): Promise<Record<string, unknown>> {
|
|
|
|
|
const cacheKey = normalizeCacheKey(
|
|
|
|
|
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
|
|
|
|
|
);
|
|
|
|
|
const cached = readCache(FETCH_CACHE, cacheKey);
|
|
|
|
|
if (cached) return { ...cached.value, cached: true };
|
|
|
|
|
|
|
|
|
|
let parsedUrl: URL;
|
|
|
|
|
try {
|
|
|
|
|
parsedUrl = new URL(params.url);
|
|
|
|
|
} catch {
|
|
|
|
|
throw new Error("Invalid URL: must be http or https");
|
|
|
|
|
}
|
|
|
|
|
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
|
|
|
|
|
throw new Error("Invalid URL: must be http or https");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const start = Date.now();
|
|
|
|
|
const res = await fetch(parsedUrl.toString(), {
|
|
|
|
|
method: "GET",
|
|
|
|
|
headers: {
|
|
|
|
|
Accept: "*/*",
|
|
|
|
|
"User-Agent": params.userAgent,
|
|
|
|
|
},
|
|
|
|
|
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
const detail = await readResponseText(res);
|
|
|
|
|
throw new Error(`Web fetch failed (${res.status}): ${detail || res.statusText}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const contentType = res.headers.get("content-type") ?? "application/octet-stream";
|
|
|
|
|
const body = await readResponseText(res);
|
|
|
|
|
|
|
|
|
|
let title: string | undefined;
|
|
|
|
|
let text = body;
|
|
|
|
|
if (contentType.includes("text/html")) {
|
|
|
|
|
const parsed = params.extractMode === "text" ? htmlToText(body) : htmlToMarkdown(body);
|
|
|
|
|
text = parsed.text;
|
|
|
|
|
title = parsed.title;
|
|
|
|
|
} else if (contentType.includes("application/json")) {
|
|
|
|
|
try {
|
|
|
|
|
text = JSON.stringify(JSON.parse(body), null, 2);
|
|
|
|
|
} catch {
|
|
|
|
|
text = body;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const truncated = truncateText(text, params.maxChars);
|
|
|
|
|
const payload = {
|
|
|
|
|
url: params.url,
|
|
|
|
|
finalUrl: res.url || params.url,
|
|
|
|
|
status: res.status,
|
|
|
|
|
contentType,
|
|
|
|
|
title,
|
|
|
|
|
extractMode: params.extractMode,
|
|
|
|
|
truncated: truncated.truncated,
|
|
|
|
|
length: truncated.text.length,
|
|
|
|
|
fetchedAt: new Date().toISOString(),
|
|
|
|
|
tookMs: Date.now() - start,
|
|
|
|
|
text: truncated.text,
|
|
|
|
|
};
|
|
|
|
|
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
|
|
|
|
return payload;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function createWebSearchTool(options?: {
|
|
|
|
|
config?: ClawdbotConfig;
|
|
|
|
|
sandboxed?: boolean;
|
|
|
|
|
}): AnyAgentTool | null {
|
|
|
|
|
const search = resolveSearchConfig(options?.config);
|
|
|
|
|
if (!resolveSearchEnabled({ search, sandboxed: options?.sandboxed })) return null;
|
|
|
|
|
const apiKey = resolveSearchApiKey(search);
|
|
|
|
|
if (!apiKey) return null;
|
|
|
|
|
return {
|
|
|
|
|
label: "Web Search",
|
|
|
|
|
name: "web_search",
|
|
|
|
|
description:
|
|
|
|
|
"Search the web using Brave Search API. Returns titles, URLs, and snippets for fast research.",
|
|
|
|
|
parameters: WebSearchSchema,
|
|
|
|
|
execute: async (_toolCallId, args) => {
|
|
|
|
|
const params = args as Record<string, unknown>;
|
|
|
|
|
const query = readStringParam(params, "query", { required: true });
|
|
|
|
|
const count =
|
|
|
|
|
readNumberParam(params, "count", { integer: true }) ?? search?.maxResults ?? undefined;
|
|
|
|
|
const result = await runWebSearch({
|
|
|
|
|
query,
|
|
|
|
|
count: resolveSearchCount(count, DEFAULT_SEARCH_COUNT),
|
|
|
|
|
apiKey,
|
|
|
|
|
timeoutSeconds: resolveTimeoutSeconds(search?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
|
|
|
|
|
cacheTtlMs: resolveCacheTtlMs(search?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
|
|
|
|
|
provider: resolveSearchProvider(search),
|
|
|
|
|
});
|
|
|
|
|
return jsonResult(result);
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function createWebFetchTool(options?: {
|
|
|
|
|
config?: ClawdbotConfig;
|
|
|
|
|
sandboxed?: boolean;
|
|
|
|
|
}): AnyAgentTool | null {
|
|
|
|
|
const fetch = resolveFetchConfig(options?.config);
|
|
|
|
|
if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) return null;
|
|
|
|
|
const userAgent =
|
|
|
|
|
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
|
|
|
|
|
`clawdbot/${VERSION}`;
|
|
|
|
|
return {
|
|
|
|
|
label: "Web Fetch",
|
|
|
|
|
name: "web_fetch",
|
|
|
|
|
description:
|
|
|
|
|
"Fetch and extract readable content from a URL (HTML → markdown/text). Use for lightweight page access without browser automation.",
|
|
|
|
|
parameters: WebFetchSchema,
|
|
|
|
|
execute: async (_toolCallId, args) => {
|
|
|
|
|
const params = args as Record<string, unknown>;
|
|
|
|
|
const url = readStringParam(params, "url", { required: true });
|
|
|
|
|
const extractMode =
|
|
|
|
|
readStringParam(params, "extractMode") === "text" ? "text" : "markdown";
|
|
|
|
|
const maxChars = readNumberParam(params, "maxChars", { integer: true });
|
|
|
|
|
const result = await runWebFetch({
|
|
|
|
|
url,
|
|
|
|
|
extractMode,
|
|
|
|
|
maxChars: resolveMaxChars(maxChars ?? fetch?.maxChars, DEFAULT_FETCH_MAX_CHARS),
|
|
|
|
|
timeoutSeconds: resolveTimeoutSeconds(fetch?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
|
|
|
|
|
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
|
|
|
|
|
userAgent,
|
|
|
|
|
});
|
|
|
|
|
return jsonResult(result);
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|