diff --git a/src/agents/tools/web-fetch-utils.ts b/src/agents/tools/web-fetch-utils.ts index a5e7e0490..cf40d8b6d 100644 --- a/src/agents/tools/web-fetch-utils.ts +++ b/src/agents/tools/web-fetch-utils.ts @@ -81,6 +81,14 @@ export async function extractReadableContent(params: { url: string; extractMode: ExtractMode; }): Promise<{ text: string; title?: string } | null> { + const fallback = (): { text: string; title?: string } => { + const rendered = htmlToMarkdown(params.html); + if (params.extractMode === "text") { + const text = markdownToText(rendered.text) || normalizeWhitespace(stripTags(params.html)); + return { text, title: rendered.title }; + } + return rendered; + }; try { const [{ Readability }, { parseHTML }] = await Promise.all([ import("@mozilla/readability"), @@ -94,15 +102,15 @@ export async function extractReadableContent(params: { } const reader = new Readability(document, { charThreshold: 0 }); const parsed = reader.parse(); - if (!parsed?.content) return null; + if (!parsed?.content) return fallback(); const title = parsed.title || undefined; if (params.extractMode === "text") { const text = normalizeWhitespace(parsed.textContent ?? ""); - return { text, title }; + return text ? { text, title } : fallback(); } const rendered = htmlToMarkdown(parsed.content); return { text: rendered.text, title: title ?? rendered.title }; } catch { - return null; + return fallback(); } }