diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cf936b28..63524b033 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Docs: https://docs.clawd.bot ### Fixes - Plugins: surface plugin load/register/config errors in gateway logs with plugin/source context. +- Web: trim HTML error bodies in web_fetch failures. (#1193) — thanks @sebslight. ## 2026.1.18-5 diff --git a/src/agents/tools/web-fetch-utils.ts b/src/agents/tools/web-fetch-utils.ts index 33a420703..a5e7e0490 100644 --- a/src/agents/tools/web-fetch-utils.ts +++ b/src/agents/tools/web-fetch-utils.ts @@ -25,7 +25,7 @@ function normalizeWhitespace(value: string): string { .trim(); } -function htmlToMarkdown(html: string): { text: string; title?: string } { +export function htmlToMarkdown(html: string): { text: string; title?: string } { const titleMatch = html.match(/]*>([\s\S]*?)<\/title>/i); const title = titleMatch ? normalizeWhitespace(stripTags(titleMatch[1])) : undefined; let text = html diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index 721068385..be766ed9d 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -18,6 +18,7 @@ import { } from "./web-shared.js"; import { extractReadableContent, + htmlToMarkdown, markdownToText, truncateText, type ExtractMode, @@ -28,6 +29,7 @@ export { extractReadableContent } from "./web-fetch-utils.js"; const EXTRACT_MODES = ["markdown", "text"] as const; const DEFAULT_FETCH_MAX_CHARS = 50_000; +const DEFAULT_ERROR_MAX_CHARS = 4_000; const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev"; const DEFAULT_FIRECRAWL_MAX_AGE_MS = 172_800_000; const DEFAULT_FETCH_USER_AGENT = @@ -142,6 +144,30 @@ function resolveMaxChars(value: unknown, fallback: number): number { return Math.max(100, Math.floor(parsed)); } +function looksLikeHtml(value: string): boolean { + const trimmed = value.trimStart(); + if (!trimmed) return false; + const head = trimmed.slice(0, 256).toLowerCase(); + return head.startsWith(" html, + }; +} function requestUrl(input: RequestInfo): string { if (typeof input === "string") return input; if (input instanceof URL) return input.toString(); @@ -182,4 +196,64 @@ describe("web_fetch extraction fallbacks", () => { expect(details.extractor).toBe("firecrawl"); expect(details.text).toContain("firecrawl fallback"); }); + it("strips and truncates HTML from error responses", async () => { + const long = "x".repeat(12_000); + const html = + "Not Found

Not Found

" + + long + + "

"; + const mockFetch = vi.fn((input: RequestInfo) => + Promise.resolve(errorHtmlResponse(html, 404, requestUrl(input), "Text/HTML; charset=utf-8")), + ); + // @ts-expect-error mock fetch + global.fetch = mockFetch; + + const tool = createWebFetchTool({ + config: { + tools: { + web: { + fetch: { cacheTtlMinutes: 0, firecrawl: { enabled: false } }, + }, + }, + }, + sandboxed: false, + }); + + let message = ""; + try { + await tool?.execute?.("call", { url: "https://example.com/missing" }); + } catch (error) { + message = (error as Error).message; + } + + expect(message).toContain("Web fetch failed (404):"); + expect(message).toContain("Not Found"); + expect(message).not.toContain(" { + const html = + "Oops

Oops

"; + const mockFetch = vi.fn((input: RequestInfo) => + Promise.resolve(errorHtmlResponse(html, 500, requestUrl(input), null)), + ); + // @ts-expect-error mock fetch + global.fetch = mockFetch; + + const tool = createWebFetchTool({ + config: { + tools: { + web: { + fetch: { cacheTtlMinutes: 0, firecrawl: { enabled: false } }, + }, + }, + }, + sandboxed: false, + }); + + await expect(tool?.execute?.("call", { url: "https://example.com/oops" })).rejects.toThrow( + /Web fetch failed \(500\):.*Oops/, + ); + }); }); diff --git a/src/gateway/server.cron.test.ts b/src/gateway/server.cron.test.ts index 3cad8ac1d..98ed67502 100644 --- a/src/gateway/server.cron.test.ts +++ b/src/gateway/server.cron.test.ts @@ -95,7 +95,7 @@ describe("gateway server cron", () => { const jobId = typeof jobIdValue === "string" ? jobIdValue : ""; expect(jobId.length > 0).toBe(true); - const runRes = await rpcReq(ws, "cron.run", { id: jobId, mode: "force" }); + const runRes = await rpcReq(ws, "cron.run", { id: jobId, mode: "force" }, 20_000); expect(runRes.ok).toBe(true); const events = await waitForSystemEvent(); @@ -279,7 +279,8 @@ describe("gateway server cron", () => { const jobId = typeof jobIdValue === "string" ? jobIdValue : ""; expect(jobId.length > 0).toBe(true); - const runRes = await rpcReq(ws, "cron.run", { id: jobId, mode: "force" }); + // Full-suite runs can starve the event loop; give cron.run extra time to respond. + const runRes = await rpcReq(ws, "cron.run", { id: jobId, mode: "force" }, 20_000); expect(runRes.ok).toBe(true); const logPath = path.join(dir, "cron", "runs", `${jobId}.jsonl`); @@ -375,7 +376,7 @@ describe("gateway server cron", () => { expect(last.jobId).toBe(jobId); expect(last.summary).toBe("hello"); - const runsRes = await rpcReq(ws, "cron.runs", { id: jobId, limit: 20 }); + const runsRes = await rpcReq(ws, "cron.runs", { id: jobId, limit: 20 }, 20_000); expect(runsRes.ok).toBe(true); const entries = (runsRes.payload as { entries?: unknown } | null)?.entries; expect(Array.isArray(entries)).toBe(true); diff --git a/src/gateway/test-helpers.server.ts b/src/gateway/test-helpers.server.ts index 0a88a25e6..796222af8 100644 --- a/src/gateway/test-helpers.server.ts +++ b/src/gateway/test-helpers.server.ts @@ -291,7 +291,12 @@ export async function connectOk(ws: WebSocket, opts?: Parameters(ws: WebSocket, method: string, params?: unknown) { +export async function rpcReq( + ws: WebSocket, + method: string, + params?: unknown, + timeoutMs?: number, +) { const { randomUUID } = await import("node:crypto"); const id = randomUUID(); ws.send(JSON.stringify({ type: "req", id, method, params })); @@ -301,11 +306,15 @@ export async function rpcReq(ws: WebSocket, method: string, params? ok: boolean; payload?: T; error?: { message?: string; code?: string }; - }>(ws, (o) => { - if (!o || typeof o !== "object" || Array.isArray(o)) return false; - const rec = o as Record; - return rec.type === "res" && rec.id === id; - }); + }>( + ws, + (o) => { + if (!o || typeof o !== "object" || Array.isArray(o)) return false; + const rec = o as Record; + return rec.type === "res" && rec.id === id; + }, + timeoutMs, + ); } export async function waitForSystemEvent(timeoutMs = 2000) {