diff --git a/docs/brave-search.md b/docs/brave-search.md new file mode 100644 index 000000000..a29143fb4 --- /dev/null +++ b/docs/brave-search.md @@ -0,0 +1,40 @@ +--- +summary: "Brave Search API setup for web_search" +read_when: + - You want to use Brave Search for web_search + - You need a BRAVE_API_KEY or plan details +--- + +# Brave Search API + +Clawdbot uses Brave Search as the default provider for `web_search`. + +## Get an API key + +1) Create a Brave Search API account at https://brave.com/search/api/ +2) In the dashboard, choose the **Data for Search** plan and generate an API key. +3) Store the key in config (recommended) or set `BRAVE_API_KEY` in the Gateway environment. + +## Config example + +```json5 +{ + tools: { + web: { + search: { + provider: "brave", + apiKey: "BRAVE_API_KEY_HERE", + maxResults: 5, + timeoutSeconds: 30 + } + } + } +} +``` + +## Notes + +- The Data for AI plan is **not** compatible with `web_search`. +- Brave provides a free tier plus paid plans; check the Brave API portal for current limits. + +See [Web tools](/tools/web) for the full web_search configuration. diff --git a/docs/perplexity.md b/docs/perplexity.md new file mode 100644 index 000000000..829c2f25f --- /dev/null +++ b/docs/perplexity.md @@ -0,0 +1,76 @@ +--- +summary: "Perplexity Sonar setup for web_search" +read_when: + - You want to use Perplexity Sonar for web search + - You need PERPLEXITY_API_KEY or OpenRouter setup +--- + +# Perplexity Sonar + +Clawdbot can use Perplexity Sonar for the `web_search` tool. You can connect +through Perplexity’s direct API or via OpenRouter. + +## API options + +### Perplexity (direct) + +- Base URL: https://api.perplexity.ai +- Environment variable: `PERPLEXITY_API_KEY` + +### OpenRouter (alternative) + +- Base URL: https://openrouter.ai/api/v1 +- Environment variable: `OPENROUTER_API_KEY` +- Supports prepaid/crypto credits. + +## Config example + +```json5 +{ + tools: { + web: { + search: { + provider: "perplexity", + perplexity: { + apiKey: "pplx-...", + baseUrl: "https://api.perplexity.ai", + model: "perplexity/sonar-pro" + } + } + } + } +} +``` + +## Switching from Brave + +```json5 +{ + tools: { + web: { + search: { + provider: "perplexity", + perplexity: { + apiKey: "pplx-...", + baseUrl: "https://api.perplexity.ai" + } + } + } + } +} +``` + +If both `PERPLEXITY_API_KEY` and `OPENROUTER_API_KEY` are set, set +`tools.web.search.perplexity.baseUrl` (or `tools.web.search.perplexity.apiKey`) +to disambiguate. + +If `PERPLEXITY_API_KEY` is used from the environment and no base URL is set, +Clawdbot defaults to the direct Perplexity endpoint. Set `baseUrl` to override. + +## Models + +- `perplexity/sonar` — fast Q&A with web search +- `perplexity/sonar-pro` (default) — multi-step reasoning + web search +- `perplexity/sonar-reasoning-pro` — deep research + +See [Web tools](/tools/web) for the full web_search configuration. diff --git a/docs/tools/web.md b/docs/tools/web.md index f36f0f0b4..3780538a5 100644 --- a/docs/tools/web.md +++ b/docs/tools/web.md @@ -1,5 +1,5 @@ --- -summary: "Web search + fetch tools (Brave Search API, Perplexity via OpenRouter)" +summary: "Web search + fetch tools (Brave Search API, Perplexity direct/OpenRouter)" read_when: - You want to enable web_search or web_fetch - You need Brave Search API key setup @@ -33,6 +33,8 @@ These are **not** browser automation. For JS-heavy sites or logins, use the | **Brave** (default) | Fast, structured results, free tier | Traditional search results | `BRAVE_API_KEY` | | **Perplexity** | AI-synthesized answers, citations, real-time | Requires OpenRouter credits | `OPENROUTER_API_KEY` or `PERPLEXITY_API_KEY` | +See [Brave Search setup](/brave-search) and [Perplexity Sonar](/perplexity) for provider-specific details. + Set the provider in config: ```json5 @@ -47,6 +49,25 @@ Set the provider in config: } ``` +Example: switch to Perplexity Sonar (direct API): + +```json5 +{ + tools: { + web: { + search: { + provider: "perplexity", + perplexity: { + apiKey: "pplx-...", + baseUrl: "https://api.perplexity.ai", + model: "perplexity/sonar-pro" + } + } + } + } +} +``` + ## Getting a Brave API key 1) Create a Brave Search API account at https://brave.com/search/api/ @@ -65,7 +86,7 @@ current limits and pricing. environment. For a daemon install, put it in `~/.clawdbot/.env` (or your service environment). See [Env vars](/start/faq#how-does-clawdbot-load-environment-variables). -## Using Perplexity (via OpenRouter) +## Using Perplexity (direct or via OpenRouter) Perplexity Sonar models have built-in web search capabilities and return AI-synthesized answers with citations. You can use them via OpenRouter (no credit card required - supports @@ -103,6 +124,9 @@ crypto/prepaid). **Environment alternative:** set `OPENROUTER_API_KEY` or `PERPLEXITY_API_KEY` in the Gateway environment. For a daemon install, put it in `~/.clawdbot/.env`. +If `PERPLEXITY_API_KEY` is used from the environment and no base URL is set, +Clawdbot defaults to the direct Perplexity endpoint (`https://api.perplexity.ai`). + ### Available Perplexity models | Model | Description | Best for | diff --git a/src/agents/tools/web-fetch-utils.ts b/src/agents/tools/web-fetch-utils.ts new file mode 100644 index 000000000..1a780b9d2 --- /dev/null +++ b/src/agents/tools/web-fetch-utils.ts @@ -0,0 +1,105 @@ +export type ExtractMode = "markdown" | "text"; + +function decodeEntities(value: string): string { + return value + .replace(/ /gi, " ") + .replace(/&/gi, "&") + .replace(/"/gi, '"') + .replace(/'/gi, "'") + .replace(/</gi, "<") + .replace(/>/gi, ">") + .replace(/([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16))) + .replace(/(\d+);/gi, (_, dec) => String.fromCharCode(Number.parseInt(dec, 10))); +} + +function stripTags(value: string): string { + return decodeEntities(value.replace(/<[^>]+>/g, "")); +} + +function normalizeWhitespace(value: string): string { + return value + .replace(/\r/g, "") + .replace(/[ \t]+\n/g, "\n") + .replace(/\n{3,}/g, "\n\n") + .replace(/[ \t]{2,}/g, " ") + .trim(); +} + +function htmlToMarkdown(html: string): { text: string; title?: string } { + const titleMatch = html.match(/