feat: add web tools

This commit is contained in:
Peter Steinberger
2026-01-15 04:07:29 +00:00
parent 31d3aef8d6
commit f275cc180b
18 changed files with 736 additions and 165 deletions

View File

@@ -1,30 +0,0 @@
---
name: brave-search
description: Web search and content extraction via Brave Search API.
homepage: https://brave.com/search/api
metadata: {"clawdbot":{"emoji":"🦁","requires":{"bins":["node"],"env":["BRAVE_API_KEY"]},"primaryEnv":"BRAVE_API_KEY"}}
---
# Brave Search
Headless web search (and lightweight content extraction) using Brave Search API. No browser required.
## Search
```bash
node {baseDir}/scripts/search.mjs "query"
node {baseDir}/scripts/search.mjs "query" -n 10
node {baseDir}/scripts/search.mjs "query" --content
node {baseDir}/scripts/search.mjs "query" -n 3 --content
```
## Extract a page
```bash
node {baseDir}/scripts/content.mjs "https://example.com/article"
```
Notes:
- Needs `BRAVE_API_KEY`.
- Content extraction is best-effort (good for articles; not for app-like sites).
- If a site is blocked or too JS-heavy, prefer the `summarize` skill (it can use a Firecrawl fallback).

View File

@@ -1,53 +0,0 @@
#!/usr/bin/env node
function usage() {
console.error(`Usage: content.mjs <url>`);
process.exit(2);
}
export async function fetchAsMarkdown(url) {
const resp = await fetch(url, {
headers: { "User-Agent": "clawdbot-brave-search/1.0" },
});
const html = await resp.text();
// Very lightweight “readability-ish” extraction without dependencies:
// - drop script/style/nav/footer
// - strip tags
// - keep paragraphs
const cleaned = html
.replace(/<script[\s\S]*?<\/script>/gi, " ")
.replace(/<style[\s\S]*?<\/style>/gi, " ")
.replace(/<(nav|footer|header)[\s\S]*?<\/\1>/gi, " ")
.replace(/<br\s*\/?>/gi, "\n")
.replace(/<\/p>/gi, "\n\n")
.replace(/<\/div>/gi, "\n")
.replace(/<[^>]+>/g, " ")
.replace(/&nbsp;/g, " ")
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/\s+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
if (!resp.ok) {
return `> Fetch failed (${resp.status}).\n\n${cleaned.slice(0, 2000)}\n`;
}
const paras = cleaned
.split("\n\n")
.map((p) => p.trim())
.filter(Boolean)
.slice(0, 30);
return paras.map((p) => `- ${p}`).join("\n") + "\n";
}
const args = process.argv.slice(2);
if (args.length === 0 || args[0] === "-h" || args[0] === "--help") usage();
const url = args[0];
process.stdout.write(await fetchAsMarkdown(url));

View File

@@ -1,79 +0,0 @@
#!/usr/bin/env node
function usage() {
console.error(`Usage: search.mjs "query" [-n 5] [--content]`);
process.exit(2);
}
const args = process.argv.slice(2);
if (args.length === 0 || args[0] === "-h" || args[0] === "--help") usage();
const query = args[0];
let n = 5;
let withContent = false;
for (let i = 1; i < args.length; i++) {
const a = args[i];
if (a === "-n") {
n = Number.parseInt(args[i + 1] ?? "5", 10);
i++;
continue;
}
if (a === "--content") {
withContent = true;
continue;
}
console.error(`Unknown arg: ${a}`);
usage();
}
const apiKey = (process.env.BRAVE_API_KEY ?? "").trim();
if (!apiKey) {
console.error("Missing BRAVE_API_KEY");
process.exit(1);
}
const endpoint = new URL("https://api.search.brave.com/res/v1/web/search");
endpoint.searchParams.set("q", query);
endpoint.searchParams.set("count", String(Math.max(1, Math.min(n, 20))));
endpoint.searchParams.set("text_decorations", "false");
endpoint.searchParams.set("safesearch", "moderate");
const resp = await fetch(endpoint, {
headers: {
Accept: "application/json",
"X-Subscription-Token": apiKey,
},
});
if (!resp.ok) {
const text = await resp.text().catch(() => "");
throw new Error(`Brave Search failed (${resp.status}): ${text}`);
}
const data = await resp.json();
const results = (data?.web?.results ?? []).slice(0, n);
const lines = [];
for (const r of results) {
const title = String(r?.title ?? "").trim();
const url = String(r?.url ?? "").trim();
const desc = String(r?.description ?? "").trim();
if (!title || !url) continue;
lines.push(`- ${title}\n ${url}${desc ? `\n ${desc}` : ""}`);
}
process.stdout.write(lines.join("\n\n") + "\n");
if (!withContent) process.exit(0);
process.stdout.write("\n---\n\n");
for (const r of results) {
const title = String(r?.title ?? "").trim();
const url = String(r?.url ?? "").trim();
if (!url) continue;
process.stdout.write(`# ${title || url}\n${url}\n\n`);
const child = await import("./content.mjs");
const text = await child.fetchAsMarkdown(url);
process.stdout.write(text.trimEnd() + "\n\n");
}