feat(voicewake): route replies to last channel

This commit is contained in:
Peter Steinberger
2025-12-12 16:15:19 +00:00
parent 3f1bcac077
commit a524b9ae9b
15 changed files with 536 additions and 144 deletions

View File

@@ -1,4 +1,5 @@
import crypto from "node:crypto";
import { chunkText } from "../auto-reply/chunk.js";
import { runCommandReply } from "../auto-reply/command-reply.js";
import {
applyTemplate,
@@ -36,6 +37,8 @@ type AgentCommandOpts = {
timeout?: string;
deliver?: boolean;
surface?: string;
provider?: string;
bestEffortDeliver?: boolean;
};
type SessionResolution = {
@@ -369,11 +372,47 @@ export async function agentCommand(
const payloads = result.payloads ?? [];
const deliver = opts.deliver === true;
const targetTo = opts.to ? normalizeE164(opts.to) : allowFrom[0];
if (deliver && !targetTo) {
throw new Error(
"Delivering to WhatsApp requires --to <E.164> or inbound.allowFrom[0]",
);
const bestEffortDeliver = opts.bestEffortDeliver === true;
const provider = (opts.provider ?? "whatsapp").toLowerCase();
const whatsappTarget = opts.to ? normalizeE164(opts.to) : allowFrom[0];
const telegramTarget = opts.to?.trim() || undefined;
const logDeliveryError = (err: unknown) => {
const message = `Delivery failed (${provider}): ${String(err)}`;
runtime.error?.(message);
if (!runtime.error) runtime.log(message);
};
if (deliver) {
if (provider === "whatsapp" && !whatsappTarget) {
const err = new Error(
"Delivering to WhatsApp requires --to <E.164> or inbound.allowFrom[0]",
);
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
if (provider === "telegram" && !telegramTarget) {
const err = new Error("Delivering to Telegram requires --to <chatId>");
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
if (provider === "webchat") {
const err = new Error(
"Delivering to WebChat is not supported via `clawdis agent`; use WebChat RPC instead.",
);
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
if (
provider !== "whatsapp" &&
provider !== "telegram" &&
provider !== "webchat"
) {
const err = new Error(`Unknown provider: ${provider}`);
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
}
if (opts.json) {
@@ -414,22 +453,55 @@ export async function agentCommand(
runtime.log(lines.join("\n"));
}
if (deliver && targetTo) {
const text = payload.text ?? "";
const media = mediaList;
if (!text && media.length === 0) continue;
if (!deliver) continue;
const primaryMedia = media[0];
await deps.sendMessageWhatsApp(targetTo, text, {
verbose: false,
mediaUrl: primaryMedia,
});
const text = payload.text ?? "";
const media = mediaList;
if (!text && media.length === 0) continue;
for (const extra of media.slice(1)) {
await deps.sendMessageWhatsApp(targetTo, "", {
if (provider === "whatsapp" && whatsappTarget) {
try {
const primaryMedia = media[0];
await deps.sendMessageWhatsApp(whatsappTarget, text, {
verbose: false,
mediaUrl: extra,
mediaUrl: primaryMedia,
});
for (const extra of media.slice(1)) {
await deps.sendMessageWhatsApp(whatsappTarget, "", {
verbose: false,
mediaUrl: extra,
});
}
} catch (err) {
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
continue;
}
if (provider === "telegram" && telegramTarget) {
try {
if (media.length === 0) {
for (const chunk of chunkText(text, 4000)) {
await deps.sendMessageTelegram(telegramTarget, chunk, {
verbose: false,
});
}
} else {
let first = true;
for (const url of media) {
const caption = first ? text : "";
first = false;
await deps.sendMessageTelegram(telegramTarget, caption, {
verbose: false,
mediaUrl: url,
});
}
}
} catch (err) {
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
}
}

View File

@@ -1,3 +1,4 @@
import crypto from "node:crypto";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
@@ -20,6 +21,8 @@ export type SessionEntry = {
totalTokens?: number;
model?: string;
contextTokens?: number;
lastChannel?: "whatsapp" | "telegram" | "webchat";
lastTo?: string;
// Optional flag to mirror Mac app UI and future sync states.
syncing?: boolean | string;
};
@@ -66,6 +69,37 @@ export async function saveSessionStore(
);
}
export async function updateLastRoute(params: {
storePath: string;
sessionKey: string;
channel: SessionEntry["lastChannel"];
to?: string;
}) {
const { storePath, sessionKey, channel, to } = params;
const store = loadSessionStore(storePath);
const existing = store[sessionKey];
const now = Date.now();
const next: SessionEntry = {
sessionId: existing?.sessionId ?? crypto.randomUUID(),
updatedAt: Math.max(existing?.updatedAt ?? 0, now),
systemSent: existing?.systemSent,
abortedLastRun: existing?.abortedLastRun,
thinkingLevel: existing?.thinkingLevel,
verboseLevel: existing?.verboseLevel,
inputTokens: existing?.inputTokens,
outputTokens: existing?.outputTokens,
totalTokens: existing?.totalTokens,
model: existing?.model,
contextTokens: existing?.contextTokens,
syncing: existing?.syncing,
lastChannel: channel,
lastTo: to?.trim() ? to.trim() : undefined,
};
store[sessionKey] = next;
await saveSessionStore(storePath, store);
return next;
}
// Decide which session bucket to use (per-sender vs global).
export function deriveSessionKey(scope: SessionScope, ctx: MsgContext) {
if (scope === "global") return "global";

View File

@@ -211,8 +211,10 @@ export const AgentParamsSchema = Type.Object(
message: NonEmptyString,
to: Type.Optional(Type.String()),
sessionId: Type.Optional(Type.String()),
sessionKey: Type.Optional(Type.String()),
thinking: Type.Optional(Type.String()),
deliver: Type.Optional(Type.Boolean()),
channel: Type.Optional(Type.String()),
timeout: Type.Optional(Type.Integer({ minimum: 0 })),
idempotencyKey: NonEmptyString,
},

View File

@@ -2,8 +2,8 @@ import { type AddressInfo, createServer } from "node:net";
import { describe, expect, test, vi } from "vitest";
import { WebSocket } from "ws";
import { emitAgentEvent } from "../infra/agent-events.js";
import { startGatewayServer } from "./server.js";
import { GatewayLockError } from "../infra/gateway-lock.js";
import { startGatewayServer } from "./server.js";
vi.mock("../commands/health.js", () => ({
getHealthSnapshot: vi.fn().mockResolvedValue({ ok: true, stub: true }),
@@ -35,7 +35,10 @@ async function getFreePort(): Promise<number> {
});
}
async function occupyPort(): Promise<{ server: ReturnType<typeof createServer>; port: number }> {
async function occupyPort(): Promise<{
server: ReturnType<typeof createServer>;
port: number;
}> {
return await new Promise((resolve, reject) => {
const server = createServer();
server.once("error", reject);

View File

@@ -1,8 +1,11 @@
import { randomUUID } from "node:crypto";
import fs from "node:fs";
import {
createServer as createHttpServer,
type Server as HttpServer,
} from "node:http";
import os from "node:os";
import path from "node:path";
import { createServer as createHttpServer, type Server as HttpServer } from "node:http";
import chalk from "chalk";
import { type WebSocket, WebSocketServer } from "ws";
import { createDefaultDeps } from "../cli/deps.js";
@@ -850,7 +853,9 @@ export async function startGatewayServer(
break;
}
}
const { storePath, store, entry } = loadSessionEntry(p.sessionKey);
const { cfg, storePath, store, entry } = loadSessionEntry(
p.sessionKey,
);
const now = Date.now();
const sessionId = entry?.sessionId ?? randomUUID();
const sessionEntry: SessionEntry = {
@@ -859,7 +864,15 @@ export async function startGatewayServer(
thinkingLevel: entry?.thinkingLevel,
verboseLevel: entry?.verboseLevel,
systemSent: entry?.systemSent,
lastChannel: entry?.lastChannel,
lastTo: entry?.lastTo,
};
const mainKey =
(cfg.inbound?.reply?.session?.mainKey ?? "main").trim() || "main";
if (p.sessionKey === mainKey) {
sessionEntry.lastChannel = "webchat";
delete sessionEntry.lastTo;
}
if (store) {
store[p.sessionKey] = sessionEntry;
if (storePath) {
@@ -1081,8 +1094,10 @@ export async function startGatewayServer(
message: string;
to?: string;
sessionId?: string;
sessionKey?: string;
thinking?: string;
deliver?: boolean;
channel?: string;
idempotencyKey: string;
timeout?: number;
};
@@ -1095,7 +1110,90 @@ export async function startGatewayServer(
break;
}
const message = params.message.trim();
const runId = params.sessionId || randomUUID();
const requestedSessionKey =
typeof params.sessionKey === "string" && params.sessionKey.trim()
? params.sessionKey.trim()
: undefined;
let resolvedSessionId = params.sessionId?.trim() || undefined;
let sessionEntry: SessionEntry | undefined;
let bestEffortDeliver = false;
if (requestedSessionKey) {
const { cfg, storePath, store, entry } =
loadSessionEntry(requestedSessionKey);
const now = Date.now();
const sessionId = entry?.sessionId ?? randomUUID();
sessionEntry = {
sessionId,
updatedAt: now,
thinkingLevel: entry?.thinkingLevel,
verboseLevel: entry?.verboseLevel,
systemSent: entry?.systemSent,
lastChannel: entry?.lastChannel,
lastTo: entry?.lastTo,
};
if (store) {
store[requestedSessionKey] = sessionEntry;
if (storePath) {
await saveSessionStore(storePath, store);
}
}
resolvedSessionId = sessionId;
const mainKey =
(cfg.inbound?.reply?.session?.mainKey ?? "main").trim() ||
"main";
if (requestedSessionKey === mainKey) {
chatRunSessions.set(sessionId, requestedSessionKey);
bestEffortDeliver = true;
}
}
const runId = resolvedSessionId || randomUUID();
const requestedChannelRaw =
typeof params.channel === "string" ? params.channel.trim() : "";
const requestedChannel = requestedChannelRaw
? requestedChannelRaw.toLowerCase()
: "last";
const lastChannel = sessionEntry?.lastChannel;
const lastTo =
typeof sessionEntry?.lastTo === "string"
? sessionEntry.lastTo.trim()
: "";
const resolvedChannel = (() => {
if (requestedChannel === "last") {
return lastChannel ?? "whatsapp";
}
if (
requestedChannel === "whatsapp" ||
requestedChannel === "telegram" ||
requestedChannel === "webchat"
) {
return requestedChannel;
}
return lastChannel ?? "whatsapp";
})();
const resolvedTo = (() => {
const explicit =
typeof params.to === "string" && params.to.trim()
? params.to.trim()
: undefined;
if (explicit) return explicit;
if (
resolvedChannel === "whatsapp" ||
resolvedChannel === "telegram"
) {
return lastTo || undefined;
}
return undefined;
})();
const deliver =
params.deliver === true && resolvedChannel !== "webchat";
// Acknowledge via event to avoid double res frames
const ackEvent = {
type: "event",
@@ -1114,11 +1212,14 @@ export async function startGatewayServer(
await agentCommand(
{
message,
to: params.to,
sessionId: params.sessionId,
to: resolvedTo,
sessionId: resolvedSessionId,
thinking: params.thinking,
deliver: params.deliver,
deliver,
provider: resolvedChannel,
timeout: params.timeout?.toString(),
bestEffortDeliver,
surface: "VoiceWake",
},
defaultRuntime,
deps,

View File

@@ -10,6 +10,7 @@ import { formatAgentEnvelope } from "../auto-reply/envelope.js";
import { getReplyFromConfig } from "../auto-reply/reply.js";
import type { ReplyPayload } from "../auto-reply/types.js";
import { loadConfig } from "../config/config.js";
import { resolveStorePath, updateLastRoute } from "../config/sessions.js";
import { danger, logVerbose } from "../globals.js";
import { getChildLogger } from "../logging.js";
import { mediaKindFromMime } from "../media/constants.js";
@@ -145,6 +146,18 @@ export function createTelegramBot(opts: TelegramBotOptions) {
MediaUrl: media?.path,
};
if (!isGroup) {
const sessionCfg = cfg.inbound?.reply?.session;
const mainKey = (sessionCfg?.mainKey ?? "main").trim() || "main";
const storePath = resolveStorePath(sessionCfg?.store);
await updateLastRoute({
storePath,
sessionKey: mainKey,
channel: "telegram",
to: String(chatId),
});
}
if (logVerbose()) {
const preview = body.slice(0, 200).replace(/\n/g, "\\n");
logVerbose(

View File

@@ -10,6 +10,7 @@ import {
resolveSessionKey,
resolveStorePath,
saveSessionStore,
updateLastRoute,
} from "../config/sessions.js";
import { danger, isVerbose, logVerbose, success } from "../globals.js";
import { emitHeartbeatEvent } from "../infra/heartbeat-events.js";
@@ -849,6 +850,23 @@ export async function monitorWebProvider(
`\n[${tsDisplay}] ${fromDisplay} -> ${latest.to}: ${combinedBody}`,
);
if (latest.chatType !== "group") {
const sessionCfg = cfg.inbound?.reply?.session;
const mainKey = (sessionCfg?.mainKey ?? "main").trim() || "main";
const storePath = resolveStorePath(sessionCfg?.store);
const to = latest.senderE164
? normalizeE164(latest.senderE164)
: jidToE164(latest.from);
if (to) {
await updateLastRoute({
storePath,
sessionKey: mainKey,
channel: "whatsapp",
to,
});
}
}
const replyResult = await (replyResolver ?? getReplyFromConfig)(
{
Body: combinedBody,