Add link understanding tool support (#1637)
* Add * Fix --------- Co-authored-by: Richard <dasilva333@DESKTOP-74E3GJO.localdomain>
This commit is contained in:
37
src/link-understanding/apply.ts
Normal file
37
src/link-understanding/apply.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import { finalizeInboundContext } from "../auto-reply/reply/inbound-context.js";
|
||||
import { formatLinkUnderstandingBody } from "./format.js";
|
||||
import { runLinkUnderstanding } from "./runner.js";
|
||||
|
||||
export type ApplyLinkUnderstandingResult = {
|
||||
outputs: string[];
|
||||
urls: string[];
|
||||
};
|
||||
|
||||
export async function applyLinkUnderstanding(params: {
|
||||
ctx: MsgContext;
|
||||
cfg: ClawdbotConfig;
|
||||
}): Promise<ApplyLinkUnderstandingResult> {
|
||||
const result = await runLinkUnderstanding({
|
||||
cfg: params.cfg,
|
||||
ctx: params.ctx,
|
||||
});
|
||||
|
||||
if (result.outputs.length === 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
params.ctx.LinkUnderstanding = [...(params.ctx.LinkUnderstanding ?? []), ...result.outputs];
|
||||
params.ctx.Body = formatLinkUnderstandingBody({
|
||||
body: params.ctx.Body,
|
||||
outputs: result.outputs,
|
||||
});
|
||||
|
||||
finalizeInboundContext(params.ctx, {
|
||||
forceBodyForAgent: true,
|
||||
forceBodyForCommands: true,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
2
src/link-understanding/defaults.ts
Normal file
2
src/link-understanding/defaults.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export const DEFAULT_LINK_TIMEOUT_SECONDS = 30;
|
||||
export const DEFAULT_MAX_LINKS = 3;
|
||||
27
src/link-understanding/detect.test.ts
Normal file
27
src/link-understanding/detect.test.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { extractLinksFromMessage } from "./detect.js";
|
||||
|
||||
describe("extractLinksFromMessage", () => {
|
||||
it("extracts bare http/https URLs in order", () => {
|
||||
const links = extractLinksFromMessage("see https://a.example and http://b.test");
|
||||
expect(links).toEqual(["https://a.example", "http://b.test"]);
|
||||
});
|
||||
|
||||
it("dedupes links and enforces maxLinks", () => {
|
||||
const links = extractLinksFromMessage("https://a.example https://a.example https://b.test", {
|
||||
maxLinks: 1,
|
||||
});
|
||||
expect(links).toEqual(["https://a.example"]);
|
||||
});
|
||||
|
||||
it("ignores markdown links", () => {
|
||||
const links = extractLinksFromMessage("[doc](https://docs.example) https://bare.example");
|
||||
expect(links).toEqual(["https://bare.example"]);
|
||||
});
|
||||
|
||||
it("blocks 127.0.0.1", () => {
|
||||
const links = extractLinksFromMessage("http://127.0.0.1/test https://ok.test");
|
||||
expect(links).toEqual(["https://ok.test"]);
|
||||
});
|
||||
});
|
||||
49
src/link-understanding/detect.ts
Normal file
49
src/link-understanding/detect.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { DEFAULT_MAX_LINKS } from "./defaults.js";
|
||||
|
||||
// Remove markdown link syntax so only bare URLs are considered.
|
||||
const MARKDOWN_LINK_RE = /\[[^\]]*]\((https?:\/\/\S+?)\)/gi;
|
||||
const BARE_LINK_RE = /https?:\/\/\S+/gi;
|
||||
|
||||
function stripMarkdownLinks(message: string): string {
|
||||
return message.replace(MARKDOWN_LINK_RE, " ");
|
||||
}
|
||||
|
||||
function resolveMaxLinks(value?: number): number {
|
||||
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
||||
return Math.floor(value);
|
||||
}
|
||||
return DEFAULT_MAX_LINKS;
|
||||
}
|
||||
|
||||
function isAllowedUrl(raw: string): boolean {
|
||||
try {
|
||||
const parsed = new URL(raw);
|
||||
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false;
|
||||
if (parsed.hostname === "127.0.0.1") return false;
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function extractLinksFromMessage(message: string, opts?: { maxLinks?: number }): string[] {
|
||||
const source = message?.trim();
|
||||
if (!source) return [];
|
||||
|
||||
const maxLinks = resolveMaxLinks(opts?.maxLinks);
|
||||
const sanitized = stripMarkdownLinks(source);
|
||||
const seen = new Set<string>();
|
||||
const results: string[] = [];
|
||||
|
||||
for (const match of sanitized.matchAll(BARE_LINK_RE)) {
|
||||
const raw = match[0]?.trim();
|
||||
if (!raw) continue;
|
||||
if (!isAllowedUrl(raw)) continue;
|
||||
if (seen.has(raw)) continue;
|
||||
seen.add(raw);
|
||||
results.push(raw);
|
||||
if (results.length >= maxLinks) break;
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
10
src/link-understanding/format.ts
Normal file
10
src/link-understanding/format.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
export function formatLinkUnderstandingBody(params: { body?: string; outputs: string[] }): string {
|
||||
const outputs = params.outputs.map((output) => output.trim()).filter(Boolean);
|
||||
if (outputs.length === 0) {
|
||||
return params.body ?? "";
|
||||
}
|
||||
|
||||
const base = (params.body ?? "").trim();
|
||||
if (!base) return outputs.join("\n");
|
||||
return `${base}\n\n${outputs.join("\n")}`;
|
||||
}
|
||||
4
src/link-understanding/index.ts
Normal file
4
src/link-understanding/index.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
export { applyLinkUnderstanding } from "./apply.js";
|
||||
export { extractLinksFromMessage } from "./detect.js";
|
||||
export { formatLinkUnderstandingBody } from "./format.js";
|
||||
export { runLinkUnderstanding } from "./runner.js";
|
||||
136
src/link-understanding/runner.ts
Normal file
136
src/link-understanding/runner.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import { applyTemplate } from "../auto-reply/templating.js";
|
||||
import type { LinkModelConfig, LinkToolsConfig } from "../config/types.tools.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { runExec } from "../process/exec.js";
|
||||
import { CLI_OUTPUT_MAX_BUFFER } from "../media-understanding/defaults.js";
|
||||
import { resolveTimeoutMs } from "../media-understanding/resolve.js";
|
||||
import {
|
||||
normalizeMediaUnderstandingChatType,
|
||||
resolveMediaUnderstandingScope,
|
||||
} from "../media-understanding/scope.js";
|
||||
import { DEFAULT_LINK_TIMEOUT_SECONDS } from "./defaults.js";
|
||||
import { extractLinksFromMessage } from "./detect.js";
|
||||
|
||||
export type LinkUnderstandingResult = {
|
||||
urls: string[];
|
||||
outputs: string[];
|
||||
};
|
||||
|
||||
function resolveScopeDecision(params: {
|
||||
config?: LinkToolsConfig;
|
||||
ctx: MsgContext;
|
||||
}): "allow" | "deny" {
|
||||
return resolveMediaUnderstandingScope({
|
||||
scope: params.config?.scope,
|
||||
sessionKey: params.ctx.SessionKey,
|
||||
channel: params.ctx.Surface ?? params.ctx.Provider,
|
||||
chatType: normalizeMediaUnderstandingChatType(params.ctx.ChatType),
|
||||
});
|
||||
}
|
||||
|
||||
function resolveTimeoutMsFromConfig(params: {
|
||||
config?: LinkToolsConfig;
|
||||
entry: LinkModelConfig;
|
||||
}): number {
|
||||
const configured = params.entry.timeoutSeconds ?? params.config?.timeoutSeconds;
|
||||
return resolveTimeoutMs(configured, DEFAULT_LINK_TIMEOUT_SECONDS);
|
||||
}
|
||||
|
||||
async function runCliEntry(params: {
|
||||
entry: LinkModelConfig;
|
||||
ctx: MsgContext;
|
||||
url: string;
|
||||
config?: LinkToolsConfig;
|
||||
}): Promise<string | null> {
|
||||
if ((params.entry.type ?? "cli") !== "cli") return null;
|
||||
const command = params.entry.command.trim();
|
||||
if (!command) return null;
|
||||
const args = params.entry.args ?? [];
|
||||
const timeoutMs = resolveTimeoutMsFromConfig({ config: params.config, entry: params.entry });
|
||||
const templCtx = {
|
||||
...params.ctx,
|
||||
LinkUrl: params.url,
|
||||
};
|
||||
const argv = [command, ...args].map((part, index) =>
|
||||
index === 0 ? part : applyTemplate(part, templCtx),
|
||||
);
|
||||
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`Link understanding via CLI: ${argv.join(" ")}`);
|
||||
}
|
||||
|
||||
const { stdout } = await runExec(argv[0], argv.slice(1), {
|
||||
timeoutMs,
|
||||
maxBuffer: CLI_OUTPUT_MAX_BUFFER,
|
||||
});
|
||||
const trimmed = stdout.trim();
|
||||
return trimmed || null;
|
||||
}
|
||||
|
||||
async function runLinkEntries(params: {
|
||||
entries: LinkModelConfig[];
|
||||
ctx: MsgContext;
|
||||
url: string;
|
||||
config?: LinkToolsConfig;
|
||||
}): Promise<string | null> {
|
||||
let lastError: unknown;
|
||||
for (const entry of params.entries) {
|
||||
try {
|
||||
const output = await runCliEntry({
|
||||
entry,
|
||||
ctx: params.ctx,
|
||||
url: params.url,
|
||||
config: params.config,
|
||||
});
|
||||
if (output) return output;
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`Link understanding failed for ${params.url}: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (lastError && shouldLogVerbose()) {
|
||||
logVerbose(`Link understanding exhausted for ${params.url}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function runLinkUnderstanding(params: {
|
||||
cfg: ClawdbotConfig;
|
||||
ctx: MsgContext;
|
||||
message?: string;
|
||||
}): Promise<LinkUnderstandingResult> {
|
||||
const config = params.cfg.tools?.links;
|
||||
if (!config || config.enabled === false) return { urls: [], outputs: [] };
|
||||
|
||||
const scopeDecision = resolveScopeDecision({ config, ctx: params.ctx });
|
||||
if (scopeDecision === "deny") {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose("Link understanding disabled by scope policy.");
|
||||
}
|
||||
return { urls: [], outputs: [] };
|
||||
}
|
||||
|
||||
const message = params.message ?? params.ctx.CommandBody ?? params.ctx.RawBody ?? params.ctx.Body;
|
||||
const links = extractLinksFromMessage(message ?? "", { maxLinks: config?.maxLinks });
|
||||
if (links.length === 0) return { urls: [], outputs: [] };
|
||||
|
||||
const entries = config?.models ?? [];
|
||||
if (entries.length === 0) return { urls: links, outputs: [] };
|
||||
|
||||
const outputs: string[] = [];
|
||||
for (const url of links) {
|
||||
const output = await runLinkEntries({
|
||||
entries,
|
||||
ctx: params.ctx,
|
||||
url,
|
||||
config,
|
||||
});
|
||||
if (output) outputs.push(output);
|
||||
}
|
||||
|
||||
return { urls: links, outputs };
|
||||
}
|
||||
Reference in New Issue
Block a user