Add link understanding tool support (#1637)

* Add

* Fix

---------

Co-authored-by: Richard <dasilva333@DESKTOP-74E3GJO.localdomain>
This commit is contained in:
Richard Pinedo
2026-01-24 19:15:54 -05:00
committed by GitHub
parent 2f58d59f22
commit 426168a338
13 changed files with 323 additions and 0 deletions

View File

@@ -12,6 +12,7 @@ import { resolveCommandAuthorization } from "../command-auth.js";
import type { MsgContext } from "../templating.js";
import { SILENT_REPLY_TOKEN } from "../tokens.js";
import { applyMediaUnderstanding } from "../../media-understanding/apply.js";
import { applyLinkUnderstanding } from "../../link-understanding/apply.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import { resolveDefaultModel } from "./directive-handling.js";
import { resolveReplyDirectives } from "./get-reply-directives.js";
@@ -89,6 +90,10 @@ export async function getReplyFromConfig(
agentDir,
activeModel: { provider, model },
});
await applyLinkUnderstanding({
ctx: finalized,
cfg,
});
}
const commandAuthorized = finalized.CommandAuthorized;

View File

@@ -71,6 +71,7 @@ export type MsgContext = {
Transcript?: string;
MediaUnderstanding?: MediaUnderstandingOutput[];
MediaUnderstandingDecisions?: MediaUnderstandingDecision[];
LinkUnderstanding?: string[];
Prompt?: string;
MaxChars?: number;
ChatType?: string;

View File

@@ -158,6 +158,11 @@ const FIELD_LABELS: Record<string, string> = {
"tools.media.video.attachments": "Video Understanding Attachment Policy",
"tools.media.video.models": "Video Understanding Models",
"tools.media.video.scope": "Video Understanding Scope",
"tools.links.enabled": "Enable Link Understanding",
"tools.links.maxLinks": "Link Understanding Max Links",
"tools.links.timeoutSeconds": "Link Understanding Timeout (sec)",
"tools.links.models": "Link Understanding Models",
"tools.links.scope": "Link Understanding Scope",
"tools.profile": "Tool Profile",
"agents.list[].tools.profile": "Agent Tool Profile",
"tools.byProvider": "Tool Policy by Provider",

View File

@@ -102,6 +102,30 @@ export type MediaUnderstandingConfig = {
models?: MediaUnderstandingModelConfig[];
};
export type LinkModelConfig = {
/** Use a CLI command for link processing. */
type?: "cli";
/** CLI binary (required when type=cli). */
command: string;
/** CLI args (template-enabled). */
args?: string[];
/** Optional timeout override (seconds) for this model entry. */
timeoutSeconds?: number;
};
export type LinkToolsConfig = {
/** Enable link understanding when models are configured. */
enabled?: boolean;
/** Optional scope gating for understanding. */
scope?: MediaUnderstandingScopeConfig;
/** Max number of links to process per message. */
maxLinks?: number;
/** Default timeout (seconds). */
timeoutSeconds?: number;
/** Ordered model list (fallbacks in order). */
models?: LinkModelConfig[];
};
export type MediaToolsConfig = {
/** Shared model list applied across image/audio/video. */
models?: MediaUnderstandingModelConfig[];
@@ -347,6 +371,7 @@ export type ToolsConfig = {
};
};
media?: MediaToolsConfig;
links?: LinkToolsConfig;
/** Message tool configuration. */
message?: {
/**

View File

@@ -5,6 +5,7 @@ import {
GroupChatSchema,
HumanDelaySchema,
IdentitySchema,
ToolsLinksSchema,
ToolsMediaSchema,
} from "./zod-schema.core.js";
@@ -428,6 +429,7 @@ export const ToolsSchema = z
byProvider: z.record(z.string(), ToolPolicyWithProfileSchema).optional(),
web: ToolsWebSchema,
media: ToolsMediaSchema,
links: ToolsLinksSchema,
message: z
.object({
allowCrossContextSend: z.boolean().optional(),

View File

@@ -454,6 +454,26 @@ export const ToolsMediaSchema = z
.strict()
.optional();
export const LinkModelSchema = z
.object({
type: z.literal("cli").optional(),
command: z.string().min(1),
args: z.array(z.string()).optional(),
timeoutSeconds: z.number().int().positive().optional(),
})
.strict();
export const ToolsLinksSchema = z
.object({
enabled: z.boolean().optional(),
scope: MediaUnderstandingScopeSchema,
maxLinks: z.number().int().positive().optional(),
timeoutSeconds: z.number().int().positive().optional(),
models: z.array(LinkModelSchema).optional(),
})
.strict()
.optional();
export const NativeCommandsSettingSchema = z.union([z.boolean(), z.literal("auto")]);
export const ProviderCommandsSchema = z

View File

@@ -0,0 +1,37 @@
import type { ClawdbotConfig } from "../config/config.js";
import type { MsgContext } from "../auto-reply/templating.js";
import { finalizeInboundContext } from "../auto-reply/reply/inbound-context.js";
import { formatLinkUnderstandingBody } from "./format.js";
import { runLinkUnderstanding } from "./runner.js";
export type ApplyLinkUnderstandingResult = {
outputs: string[];
urls: string[];
};
export async function applyLinkUnderstanding(params: {
ctx: MsgContext;
cfg: ClawdbotConfig;
}): Promise<ApplyLinkUnderstandingResult> {
const result = await runLinkUnderstanding({
cfg: params.cfg,
ctx: params.ctx,
});
if (result.outputs.length === 0) {
return result;
}
params.ctx.LinkUnderstanding = [...(params.ctx.LinkUnderstanding ?? []), ...result.outputs];
params.ctx.Body = formatLinkUnderstandingBody({
body: params.ctx.Body,
outputs: result.outputs,
});
finalizeInboundContext(params.ctx, {
forceBodyForAgent: true,
forceBodyForCommands: true,
});
return result;
}

View File

@@ -0,0 +1,2 @@
export const DEFAULT_LINK_TIMEOUT_SECONDS = 30;
export const DEFAULT_MAX_LINKS = 3;

View File

@@ -0,0 +1,27 @@
import { describe, expect, it } from "vitest";
import { extractLinksFromMessage } from "./detect.js";
describe("extractLinksFromMessage", () => {
it("extracts bare http/https URLs in order", () => {
const links = extractLinksFromMessage("see https://a.example and http://b.test");
expect(links).toEqual(["https://a.example", "http://b.test"]);
});
it("dedupes links and enforces maxLinks", () => {
const links = extractLinksFromMessage("https://a.example https://a.example https://b.test", {
maxLinks: 1,
});
expect(links).toEqual(["https://a.example"]);
});
it("ignores markdown links", () => {
const links = extractLinksFromMessage("[doc](https://docs.example) https://bare.example");
expect(links).toEqual(["https://bare.example"]);
});
it("blocks 127.0.0.1", () => {
const links = extractLinksFromMessage("http://127.0.0.1/test https://ok.test");
expect(links).toEqual(["https://ok.test"]);
});
});

View File

@@ -0,0 +1,49 @@
import { DEFAULT_MAX_LINKS } from "./defaults.js";
// Remove markdown link syntax so only bare URLs are considered.
const MARKDOWN_LINK_RE = /\[[^\]]*]\((https?:\/\/\S+?)\)/gi;
const BARE_LINK_RE = /https?:\/\/\S+/gi;
function stripMarkdownLinks(message: string): string {
return message.replace(MARKDOWN_LINK_RE, " ");
}
function resolveMaxLinks(value?: number): number {
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
return Math.floor(value);
}
return DEFAULT_MAX_LINKS;
}
function isAllowedUrl(raw: string): boolean {
try {
const parsed = new URL(raw);
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false;
if (parsed.hostname === "127.0.0.1") return false;
return true;
} catch {
return false;
}
}
export function extractLinksFromMessage(message: string, opts?: { maxLinks?: number }): string[] {
const source = message?.trim();
if (!source) return [];
const maxLinks = resolveMaxLinks(opts?.maxLinks);
const sanitized = stripMarkdownLinks(source);
const seen = new Set<string>();
const results: string[] = [];
for (const match of sanitized.matchAll(BARE_LINK_RE)) {
const raw = match[0]?.trim();
if (!raw) continue;
if (!isAllowedUrl(raw)) continue;
if (seen.has(raw)) continue;
seen.add(raw);
results.push(raw);
if (results.length >= maxLinks) break;
}
return results;
}

View File

@@ -0,0 +1,10 @@
export function formatLinkUnderstandingBody(params: { body?: string; outputs: string[] }): string {
const outputs = params.outputs.map((output) => output.trim()).filter(Boolean);
if (outputs.length === 0) {
return params.body ?? "";
}
const base = (params.body ?? "").trim();
if (!base) return outputs.join("\n");
return `${base}\n\n${outputs.join("\n")}`;
}

View File

@@ -0,0 +1,4 @@
export { applyLinkUnderstanding } from "./apply.js";
export { extractLinksFromMessage } from "./detect.js";
export { formatLinkUnderstandingBody } from "./format.js";
export { runLinkUnderstanding } from "./runner.js";

View File

@@ -0,0 +1,136 @@
import type { ClawdbotConfig } from "../config/config.js";
import type { MsgContext } from "../auto-reply/templating.js";
import { applyTemplate } from "../auto-reply/templating.js";
import type { LinkModelConfig, LinkToolsConfig } from "../config/types.tools.js";
import { logVerbose, shouldLogVerbose } from "../globals.js";
import { runExec } from "../process/exec.js";
import { CLI_OUTPUT_MAX_BUFFER } from "../media-understanding/defaults.js";
import { resolveTimeoutMs } from "../media-understanding/resolve.js";
import {
normalizeMediaUnderstandingChatType,
resolveMediaUnderstandingScope,
} from "../media-understanding/scope.js";
import { DEFAULT_LINK_TIMEOUT_SECONDS } from "./defaults.js";
import { extractLinksFromMessage } from "./detect.js";
export type LinkUnderstandingResult = {
urls: string[];
outputs: string[];
};
function resolveScopeDecision(params: {
config?: LinkToolsConfig;
ctx: MsgContext;
}): "allow" | "deny" {
return resolveMediaUnderstandingScope({
scope: params.config?.scope,
sessionKey: params.ctx.SessionKey,
channel: params.ctx.Surface ?? params.ctx.Provider,
chatType: normalizeMediaUnderstandingChatType(params.ctx.ChatType),
});
}
function resolveTimeoutMsFromConfig(params: {
config?: LinkToolsConfig;
entry: LinkModelConfig;
}): number {
const configured = params.entry.timeoutSeconds ?? params.config?.timeoutSeconds;
return resolveTimeoutMs(configured, DEFAULT_LINK_TIMEOUT_SECONDS);
}
async function runCliEntry(params: {
entry: LinkModelConfig;
ctx: MsgContext;
url: string;
config?: LinkToolsConfig;
}): Promise<string | null> {
if ((params.entry.type ?? "cli") !== "cli") return null;
const command = params.entry.command.trim();
if (!command) return null;
const args = params.entry.args ?? [];
const timeoutMs = resolveTimeoutMsFromConfig({ config: params.config, entry: params.entry });
const templCtx = {
...params.ctx,
LinkUrl: params.url,
};
const argv = [command, ...args].map((part, index) =>
index === 0 ? part : applyTemplate(part, templCtx),
);
if (shouldLogVerbose()) {
logVerbose(`Link understanding via CLI: ${argv.join(" ")}`);
}
const { stdout } = await runExec(argv[0], argv.slice(1), {
timeoutMs,
maxBuffer: CLI_OUTPUT_MAX_BUFFER,
});
const trimmed = stdout.trim();
return trimmed || null;
}
async function runLinkEntries(params: {
entries: LinkModelConfig[];
ctx: MsgContext;
url: string;
config?: LinkToolsConfig;
}): Promise<string | null> {
let lastError: unknown;
for (const entry of params.entries) {
try {
const output = await runCliEntry({
entry,
ctx: params.ctx,
url: params.url,
config: params.config,
});
if (output) return output;
} catch (err) {
lastError = err;
if (shouldLogVerbose()) {
logVerbose(`Link understanding failed for ${params.url}: ${String(err)}`);
}
}
}
if (lastError && shouldLogVerbose()) {
logVerbose(`Link understanding exhausted for ${params.url}`);
}
return null;
}
export async function runLinkUnderstanding(params: {
cfg: ClawdbotConfig;
ctx: MsgContext;
message?: string;
}): Promise<LinkUnderstandingResult> {
const config = params.cfg.tools?.links;
if (!config || config.enabled === false) return { urls: [], outputs: [] };
const scopeDecision = resolveScopeDecision({ config, ctx: params.ctx });
if (scopeDecision === "deny") {
if (shouldLogVerbose()) {
logVerbose("Link understanding disabled by scope policy.");
}
return { urls: [], outputs: [] };
}
const message = params.message ?? params.ctx.CommandBody ?? params.ctx.RawBody ?? params.ctx.Body;
const links = extractLinksFromMessage(message ?? "", { maxLinks: config?.maxLinks });
if (links.length === 0) return { urls: [], outputs: [] };
const entries = config?.models ?? [];
if (entries.length === 0) return { urls: links, outputs: [] };
const outputs: string[] = [];
for (const url of links) {
const output = await runLinkEntries({
entries,
ctx: params.ctx,
url,
config,
});
if (output) outputs.push(output);
}
return { urls: links, outputs };
}