feat: enhance web_fetch fallbacks

This commit is contained in:
Peter Steinberger
2026-01-17 00:00:15 +00:00
parent a84000c6d9
commit c54c665f97
11 changed files with 802 additions and 27 deletions

View File

@@ -0,0 +1,60 @@
import { createWebFetchTool } from "../src/agents/tools/web-tools.js";
const DEFAULT_URLS = [
"https://example.com/",
"https://news.ycombinator.com/",
"https://www.reddit.com/r/javascript/",
"https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent",
"https://httpbin.org/html",
];
const urls = process.argv.slice(2);
const targets = urls.length > 0 ? urls : DEFAULT_URLS;
async function runFetch(url: string, readability: boolean) {
if (!readability) {
throw new Error("Basic extraction removed. Set readability=true or enable Firecrawl.");
}
const tool = createWebFetchTool({
config: {
tools: {
web: { fetch: { readability, cacheTtlMinutes: 0, firecrawl: { enabled: false } } },
},
},
sandboxed: false,
});
if (!tool) throw new Error("web_fetch tool is disabled");
const result = await tool.execute("test", { url, extractMode: "markdown" });
return result.details as {
text?: string;
title?: string;
extractor?: string;
length?: number;
truncated?: boolean;
};
}
function truncate(value: string, max = 160): string {
if (!value) return "";
return value.length > max ? `${value.slice(0, max)}` : value;
}
async function run() {
for (const url of targets) {
console.log(`\n=== ${url}`);
const readable = await runFetch(url, true);
console.log(
`readability: ${readable.extractor ?? "unknown"} len=${readable.length ?? 0} title=${truncate(
readable.title ?? "",
80,
)}`,
);
if (readable.text) console.log(`readability sample: ${truncate(readable.text)}`);
}
}
run().catch((error) => {
console.error(error);
process.exit(1);
});