diff --git a/src/auto-reply/reply/get-reply.ts b/src/auto-reply/reply/get-reply.ts index 20887c340..f6259d738 100644 --- a/src/auto-reply/reply/get-reply.ts +++ b/src/auto-reply/reply/get-reply.ts @@ -12,6 +12,7 @@ import { resolveCommandAuthorization } from "../command-auth.js"; import type { MsgContext } from "../templating.js"; import { SILENT_REPLY_TOKEN } from "../tokens.js"; import { applyMediaUnderstanding } from "../../media-understanding/apply.js"; +import { applyLinkUnderstanding } from "../../link-understanding/apply.js"; import type { GetReplyOptions, ReplyPayload } from "../types.js"; import { resolveDefaultModel } from "./directive-handling.js"; import { resolveReplyDirectives } from "./get-reply-directives.js"; @@ -89,6 +90,10 @@ export async function getReplyFromConfig( agentDir, activeModel: { provider, model }, }); + await applyLinkUnderstanding({ + ctx: finalized, + cfg, + }); } const commandAuthorized = finalized.CommandAuthorized; diff --git a/src/auto-reply/templating.ts b/src/auto-reply/templating.ts index e9cd6d229..dd424ee71 100644 --- a/src/auto-reply/templating.ts +++ b/src/auto-reply/templating.ts @@ -71,6 +71,7 @@ export type MsgContext = { Transcript?: string; MediaUnderstanding?: MediaUnderstandingOutput[]; MediaUnderstandingDecisions?: MediaUnderstandingDecision[]; + LinkUnderstanding?: string[]; Prompt?: string; MaxChars?: number; ChatType?: string; diff --git a/src/config/schema.ts b/src/config/schema.ts index d7ad28b5c..d61b5964e 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -158,6 +158,11 @@ const FIELD_LABELS: Record = { "tools.media.video.attachments": "Video Understanding Attachment Policy", "tools.media.video.models": "Video Understanding Models", "tools.media.video.scope": "Video Understanding Scope", + "tools.links.enabled": "Enable Link Understanding", + "tools.links.maxLinks": "Link Understanding Max Links", + "tools.links.timeoutSeconds": "Link Understanding Timeout (sec)", + "tools.links.models": "Link Understanding Models", + "tools.links.scope": "Link Understanding Scope", "tools.profile": "Tool Profile", "agents.list[].tools.profile": "Agent Tool Profile", "tools.byProvider": "Tool Policy by Provider", diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index fab0cca47..ad7f69d85 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -102,6 +102,30 @@ export type MediaUnderstandingConfig = { models?: MediaUnderstandingModelConfig[]; }; +export type LinkModelConfig = { + /** Use a CLI command for link processing. */ + type?: "cli"; + /** CLI binary (required when type=cli). */ + command: string; + /** CLI args (template-enabled). */ + args?: string[]; + /** Optional timeout override (seconds) for this model entry. */ + timeoutSeconds?: number; +}; + +export type LinkToolsConfig = { + /** Enable link understanding when models are configured. */ + enabled?: boolean; + /** Optional scope gating for understanding. */ + scope?: MediaUnderstandingScopeConfig; + /** Max number of links to process per message. */ + maxLinks?: number; + /** Default timeout (seconds). */ + timeoutSeconds?: number; + /** Ordered model list (fallbacks in order). */ + models?: LinkModelConfig[]; +}; + export type MediaToolsConfig = { /** Shared model list applied across image/audio/video. */ models?: MediaUnderstandingModelConfig[]; @@ -347,6 +371,7 @@ export type ToolsConfig = { }; }; media?: MediaToolsConfig; + links?: LinkToolsConfig; /** Message tool configuration. */ message?: { /** diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 5f82cff77..c733dcfa9 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -5,6 +5,7 @@ import { GroupChatSchema, HumanDelaySchema, IdentitySchema, + ToolsLinksSchema, ToolsMediaSchema, } from "./zod-schema.core.js"; @@ -428,6 +429,7 @@ export const ToolsSchema = z byProvider: z.record(z.string(), ToolPolicyWithProfileSchema).optional(), web: ToolsWebSchema, media: ToolsMediaSchema, + links: ToolsLinksSchema, message: z .object({ allowCrossContextSend: z.boolean().optional(), diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 4087b8c7a..0301a52fe 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -454,6 +454,26 @@ export const ToolsMediaSchema = z .strict() .optional(); +export const LinkModelSchema = z + .object({ + type: z.literal("cli").optional(), + command: z.string().min(1), + args: z.array(z.string()).optional(), + timeoutSeconds: z.number().int().positive().optional(), + }) + .strict(); + +export const ToolsLinksSchema = z + .object({ + enabled: z.boolean().optional(), + scope: MediaUnderstandingScopeSchema, + maxLinks: z.number().int().positive().optional(), + timeoutSeconds: z.number().int().positive().optional(), + models: z.array(LinkModelSchema).optional(), + }) + .strict() + .optional(); + export const NativeCommandsSettingSchema = z.union([z.boolean(), z.literal("auto")]); export const ProviderCommandsSchema = z diff --git a/src/link-understanding/apply.ts b/src/link-understanding/apply.ts new file mode 100644 index 000000000..82cd1e9f4 --- /dev/null +++ b/src/link-understanding/apply.ts @@ -0,0 +1,37 @@ +import type { ClawdbotConfig } from "../config/config.js"; +import type { MsgContext } from "../auto-reply/templating.js"; +import { finalizeInboundContext } from "../auto-reply/reply/inbound-context.js"; +import { formatLinkUnderstandingBody } from "./format.js"; +import { runLinkUnderstanding } from "./runner.js"; + +export type ApplyLinkUnderstandingResult = { + outputs: string[]; + urls: string[]; +}; + +export async function applyLinkUnderstanding(params: { + ctx: MsgContext; + cfg: ClawdbotConfig; +}): Promise { + const result = await runLinkUnderstanding({ + cfg: params.cfg, + ctx: params.ctx, + }); + + if (result.outputs.length === 0) { + return result; + } + + params.ctx.LinkUnderstanding = [...(params.ctx.LinkUnderstanding ?? []), ...result.outputs]; + params.ctx.Body = formatLinkUnderstandingBody({ + body: params.ctx.Body, + outputs: result.outputs, + }); + + finalizeInboundContext(params.ctx, { + forceBodyForAgent: true, + forceBodyForCommands: true, + }); + + return result; +} diff --git a/src/link-understanding/defaults.ts b/src/link-understanding/defaults.ts new file mode 100644 index 000000000..1b35621ef --- /dev/null +++ b/src/link-understanding/defaults.ts @@ -0,0 +1,2 @@ +export const DEFAULT_LINK_TIMEOUT_SECONDS = 30; +export const DEFAULT_MAX_LINKS = 3; diff --git a/src/link-understanding/detect.test.ts b/src/link-understanding/detect.test.ts new file mode 100644 index 000000000..07545f403 --- /dev/null +++ b/src/link-understanding/detect.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it } from "vitest"; + +import { extractLinksFromMessage } from "./detect.js"; + +describe("extractLinksFromMessage", () => { + it("extracts bare http/https URLs in order", () => { + const links = extractLinksFromMessage("see https://a.example and http://b.test"); + expect(links).toEqual(["https://a.example", "http://b.test"]); + }); + + it("dedupes links and enforces maxLinks", () => { + const links = extractLinksFromMessage("https://a.example https://a.example https://b.test", { + maxLinks: 1, + }); + expect(links).toEqual(["https://a.example"]); + }); + + it("ignores markdown links", () => { + const links = extractLinksFromMessage("[doc](https://docs.example) https://bare.example"); + expect(links).toEqual(["https://bare.example"]); + }); + + it("blocks 127.0.0.1", () => { + const links = extractLinksFromMessage("http://127.0.0.1/test https://ok.test"); + expect(links).toEqual(["https://ok.test"]); + }); +}); diff --git a/src/link-understanding/detect.ts b/src/link-understanding/detect.ts new file mode 100644 index 000000000..9edecde63 --- /dev/null +++ b/src/link-understanding/detect.ts @@ -0,0 +1,49 @@ +import { DEFAULT_MAX_LINKS } from "./defaults.js"; + +// Remove markdown link syntax so only bare URLs are considered. +const MARKDOWN_LINK_RE = /\[[^\]]*]\((https?:\/\/\S+?)\)/gi; +const BARE_LINK_RE = /https?:\/\/\S+/gi; + +function stripMarkdownLinks(message: string): string { + return message.replace(MARKDOWN_LINK_RE, " "); +} + +function resolveMaxLinks(value?: number): number { + if (typeof value === "number" && Number.isFinite(value) && value > 0) { + return Math.floor(value); + } + return DEFAULT_MAX_LINKS; +} + +function isAllowedUrl(raw: string): boolean { + try { + const parsed = new URL(raw); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false; + if (parsed.hostname === "127.0.0.1") return false; + return true; + } catch { + return false; + } +} + +export function extractLinksFromMessage(message: string, opts?: { maxLinks?: number }): string[] { + const source = message?.trim(); + if (!source) return []; + + const maxLinks = resolveMaxLinks(opts?.maxLinks); + const sanitized = stripMarkdownLinks(source); + const seen = new Set(); + const results: string[] = []; + + for (const match of sanitized.matchAll(BARE_LINK_RE)) { + const raw = match[0]?.trim(); + if (!raw) continue; + if (!isAllowedUrl(raw)) continue; + if (seen.has(raw)) continue; + seen.add(raw); + results.push(raw); + if (results.length >= maxLinks) break; + } + + return results; +} diff --git a/src/link-understanding/format.ts b/src/link-understanding/format.ts new file mode 100644 index 000000000..b28d16a1a --- /dev/null +++ b/src/link-understanding/format.ts @@ -0,0 +1,10 @@ +export function formatLinkUnderstandingBody(params: { body?: string; outputs: string[] }): string { + const outputs = params.outputs.map((output) => output.trim()).filter(Boolean); + if (outputs.length === 0) { + return params.body ?? ""; + } + + const base = (params.body ?? "").trim(); + if (!base) return outputs.join("\n"); + return `${base}\n\n${outputs.join("\n")}`; +} diff --git a/src/link-understanding/index.ts b/src/link-understanding/index.ts new file mode 100644 index 000000000..d772f9655 --- /dev/null +++ b/src/link-understanding/index.ts @@ -0,0 +1,4 @@ +export { applyLinkUnderstanding } from "./apply.js"; +export { extractLinksFromMessage } from "./detect.js"; +export { formatLinkUnderstandingBody } from "./format.js"; +export { runLinkUnderstanding } from "./runner.js"; diff --git a/src/link-understanding/runner.ts b/src/link-understanding/runner.ts new file mode 100644 index 000000000..d5976a7a4 --- /dev/null +++ b/src/link-understanding/runner.ts @@ -0,0 +1,136 @@ +import type { ClawdbotConfig } from "../config/config.js"; +import type { MsgContext } from "../auto-reply/templating.js"; +import { applyTemplate } from "../auto-reply/templating.js"; +import type { LinkModelConfig, LinkToolsConfig } from "../config/types.tools.js"; +import { logVerbose, shouldLogVerbose } from "../globals.js"; +import { runExec } from "../process/exec.js"; +import { CLI_OUTPUT_MAX_BUFFER } from "../media-understanding/defaults.js"; +import { resolveTimeoutMs } from "../media-understanding/resolve.js"; +import { + normalizeMediaUnderstandingChatType, + resolveMediaUnderstandingScope, +} from "../media-understanding/scope.js"; +import { DEFAULT_LINK_TIMEOUT_SECONDS } from "./defaults.js"; +import { extractLinksFromMessage } from "./detect.js"; + +export type LinkUnderstandingResult = { + urls: string[]; + outputs: string[]; +}; + +function resolveScopeDecision(params: { + config?: LinkToolsConfig; + ctx: MsgContext; +}): "allow" | "deny" { + return resolveMediaUnderstandingScope({ + scope: params.config?.scope, + sessionKey: params.ctx.SessionKey, + channel: params.ctx.Surface ?? params.ctx.Provider, + chatType: normalizeMediaUnderstandingChatType(params.ctx.ChatType), + }); +} + +function resolveTimeoutMsFromConfig(params: { + config?: LinkToolsConfig; + entry: LinkModelConfig; +}): number { + const configured = params.entry.timeoutSeconds ?? params.config?.timeoutSeconds; + return resolveTimeoutMs(configured, DEFAULT_LINK_TIMEOUT_SECONDS); +} + +async function runCliEntry(params: { + entry: LinkModelConfig; + ctx: MsgContext; + url: string; + config?: LinkToolsConfig; +}): Promise { + if ((params.entry.type ?? "cli") !== "cli") return null; + const command = params.entry.command.trim(); + if (!command) return null; + const args = params.entry.args ?? []; + const timeoutMs = resolveTimeoutMsFromConfig({ config: params.config, entry: params.entry }); + const templCtx = { + ...params.ctx, + LinkUrl: params.url, + }; + const argv = [command, ...args].map((part, index) => + index === 0 ? part : applyTemplate(part, templCtx), + ); + + if (shouldLogVerbose()) { + logVerbose(`Link understanding via CLI: ${argv.join(" ")}`); + } + + const { stdout } = await runExec(argv[0], argv.slice(1), { + timeoutMs, + maxBuffer: CLI_OUTPUT_MAX_BUFFER, + }); + const trimmed = stdout.trim(); + return trimmed || null; +} + +async function runLinkEntries(params: { + entries: LinkModelConfig[]; + ctx: MsgContext; + url: string; + config?: LinkToolsConfig; +}): Promise { + let lastError: unknown; + for (const entry of params.entries) { + try { + const output = await runCliEntry({ + entry, + ctx: params.ctx, + url: params.url, + config: params.config, + }); + if (output) return output; + } catch (err) { + lastError = err; + if (shouldLogVerbose()) { + logVerbose(`Link understanding failed for ${params.url}: ${String(err)}`); + } + } + } + if (lastError && shouldLogVerbose()) { + logVerbose(`Link understanding exhausted for ${params.url}`); + } + return null; +} + +export async function runLinkUnderstanding(params: { + cfg: ClawdbotConfig; + ctx: MsgContext; + message?: string; +}): Promise { + const config = params.cfg.tools?.links; + if (!config || config.enabled === false) return { urls: [], outputs: [] }; + + const scopeDecision = resolveScopeDecision({ config, ctx: params.ctx }); + if (scopeDecision === "deny") { + if (shouldLogVerbose()) { + logVerbose("Link understanding disabled by scope policy."); + } + return { urls: [], outputs: [] }; + } + + const message = params.message ?? params.ctx.CommandBody ?? params.ctx.RawBody ?? params.ctx.Body; + const links = extractLinksFromMessage(message ?? "", { maxLinks: config?.maxLinks }); + if (links.length === 0) return { urls: [], outputs: [] }; + + const entries = config?.models ?? []; + if (entries.length === 0) return { urls: links, outputs: [] }; + + const outputs: string[] = []; + for (const url of links) { + const output = await runLinkEntries({ + entries, + ctx: params.ctx, + url, + config, + }); + if (output) outputs.push(output); + } + + return { urls: links, outputs }; +}