Web: trim HTML error bodies in web_fetch (#1193)
* Web: trim HTML error bodies in web_fetch * fix: trim web_fetch HTML error bodies (#1193) (thanks @sebslight) --------- Co-authored-by: Sebastian Slight <sbarrios93@gmail.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -49,6 +49,20 @@ function firecrawlError(): MockResponse {
|
||||
};
|
||||
}
|
||||
|
||||
function errorHtmlResponse(
|
||||
html: string,
|
||||
status = 404,
|
||||
url = "https://example.com/",
|
||||
contentType: string | null = "text/html; charset=utf-8",
|
||||
): MockResponse {
|
||||
return {
|
||||
ok: false,
|
||||
status,
|
||||
url,
|
||||
headers: contentType ? makeHeaders({ "content-type": contentType }) : makeHeaders({}),
|
||||
text: async () => html,
|
||||
};
|
||||
}
|
||||
function requestUrl(input: RequestInfo): string {
|
||||
if (typeof input === "string") return input;
|
||||
if (input instanceof URL) return input.toString();
|
||||
@@ -182,4 +196,64 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("firecrawl fallback");
|
||||
});
|
||||
it("strips and truncates HTML from error responses", async () => {
|
||||
const long = "x".repeat(12_000);
|
||||
const html =
|
||||
"<!doctype html><html><head><title>Not Found</title></head><body><h1>Not Found</h1><p>" +
|
||||
long +
|
||||
"</p></body></html>";
|
||||
const mockFetch = vi.fn((input: RequestInfo) =>
|
||||
Promise.resolve(errorHtmlResponse(html, 404, requestUrl(input), "Text/HTML; charset=utf-8")),
|
||||
);
|
||||
// @ts-expect-error mock fetch
|
||||
global.fetch = mockFetch;
|
||||
|
||||
const tool = createWebFetchTool({
|
||||
config: {
|
||||
tools: {
|
||||
web: {
|
||||
fetch: { cacheTtlMinutes: 0, firecrawl: { enabled: false } },
|
||||
},
|
||||
},
|
||||
},
|
||||
sandboxed: false,
|
||||
});
|
||||
|
||||
let message = "";
|
||||
try {
|
||||
await tool?.execute?.("call", { url: "https://example.com/missing" });
|
||||
} catch (error) {
|
||||
message = (error as Error).message;
|
||||
}
|
||||
|
||||
expect(message).toContain("Web fetch failed (404):");
|
||||
expect(message).toContain("Not Found");
|
||||
expect(message).not.toContain("<html");
|
||||
expect(message.length).toBeLessThan(5_000);
|
||||
});
|
||||
|
||||
it("strips HTML errors when content-type is missing", async () => {
|
||||
const html =
|
||||
"<!DOCTYPE HTML><html><head><title>Oops</title></head><body><h1>Oops</h1></body></html>";
|
||||
const mockFetch = vi.fn((input: RequestInfo) =>
|
||||
Promise.resolve(errorHtmlResponse(html, 500, requestUrl(input), null)),
|
||||
);
|
||||
// @ts-expect-error mock fetch
|
||||
global.fetch = mockFetch;
|
||||
|
||||
const tool = createWebFetchTool({
|
||||
config: {
|
||||
tools: {
|
||||
web: {
|
||||
fetch: { cacheTtlMinutes: 0, firecrawl: { enabled: false } },
|
||||
},
|
||||
},
|
||||
},
|
||||
sandboxed: false,
|
||||
});
|
||||
|
||||
await expect(tool?.execute?.("call", { url: "https://example.com/oops" })).rejects.toThrow(
|
||||
/Web fetch failed \(500\):.*Oops/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user