agent: deliver via rpc and voice forward

This commit is contained in:
Peter Steinberger
2025-12-07 06:05:00 +01:00
parent 1d38f5a4d5
commit 67fa82cf14
11 changed files with 105 additions and 45 deletions

View File

@@ -13,17 +13,25 @@ actor AgentRPC {
private struct RpcError: Error { let message: String }
func send(text: String, thinking: String?, session: String) async -> (ok: Bool, text: String?, error: String?) {
func send(
text: String,
thinking: String?,
session: String,
deliver: Bool,
to: String?) async -> (ok: Bool, text: String?, error: String?)
{
guard process?.isRunning == true else {
return (false, nil, "rpc worker not running")
}
do {
let payload: [String: Any] = [
var payload: [String: Any] = [
"type": "send",
"text": text,
"session": session,
"thinking": thinking ?? "default",
"deliver": deliver,
]
if let to { payload["to"] = to }
let data = try JSONSerialization.data(withJSONObject: payload)
guard let stdinHandle else { throw RpcError(message: "stdin missing") }
stdinHandle.write(data)

View File

@@ -24,7 +24,7 @@ let modelCatalogPathKey = "clawdis.modelCatalogPath"
let modelCatalogReloadKey = "clawdis.modelCatalogReload"
let voiceWakeSupported: Bool = ProcessInfo.processInfo.operatingSystemVersion.majorVersion >= 26
let cliHelperSearchPaths = ["/usr/local/bin", "/opt/homebrew/bin"]
let defaultVoiceWakeForwardCommand = "clawdis-mac agent --message \"${text}\" --thinking low"
let defaultVoiceWakeForwardCommand = "clawdis-mac agent --message \"${text}\" --thinking low --session main --deliver"
let defaultVoiceWakeForwardPort = 22
// Allow enough time for remote agent responses (LLM replies often take >10s).
let defaultVoiceWakeForwardTimeout: TimeInterval = 30

View File

@@ -72,14 +72,16 @@ final class ClawdisXPCService: NSObject, ClawdisXPCProtocol {
}
return await ShellRunner.run(command: command, cwd: cwd, env: env, timeout: timeoutSec)
case let .agent(message, thinking, session):
case let .agent(message, thinking, session, deliver, to):
let trimmed = message.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return Response(ok: false, message: "message empty") }
let sessionKey = session ?? "main"
let rpcResult = await AgentRPC.shared.send(
text: trimmed,
thinking: thinking,
session: sessionKey)
session: sessionKey,
deliver: deliver,
to: to)
return rpcResult.ok
? Response(ok: true, message: rpcResult.text ?? "sent")
: Response(ok: false, message: rpcResult.error ?? "failed to send")
@@ -89,12 +91,16 @@ final class ClawdisXPCService: NSObject, ClawdisXPCProtocol {
private static func runAgentCLI(
message: String,
thinking: String?,
session: String) async -> (ok: Bool, text: String?, error: String?)
session: String,
deliver: Bool,
to: String?) async -> (ok: Bool, text: String?, error: String?)
{
let projectRoot = CommandResolver.projectRootPath()
var command = CommandResolver.clawdisCommand(subcommand: "agent")
command += ["--message", message, "--json"]
if !session.isEmpty { command += ["--to", session] }
if let to { command += ["--to", to] }
if deliver { command += ["--deliver"] }
if !session.isEmpty { command += ["--session-id", session] }
if let thinking { command += ["--thinking", thinking] }
let process = Process()

View File

@@ -140,6 +140,8 @@ struct ClawdisCLI {
var message: String?
var thinking: String?
var session: String?
var deliver = false
var to: String?
while !args.isEmpty {
let arg = args.removeFirst()
@@ -147,6 +149,8 @@ struct ClawdisCLI {
case "--message": message = args.popFirst()
case "--thinking": thinking = args.popFirst()
case "--session": session = args.popFirst()
case "--deliver": deliver = true
case "--to": to = args.popFirst()
default:
// Support bare message as last argument
if message == nil {
@@ -156,7 +160,7 @@ struct ClawdisCLI {
}
guard let message else { throw CLIError.help }
return .agent(message: message, thinking: thinking, session: session)
return .agent(message: message, thinking: thinking, session: session, deliver: deliver, to: to)
default:
throw CLIError.help
@@ -178,7 +182,7 @@ struct ClawdisCLI {
clawdis-mac run [--cwd <path>] [--env KEY=VAL] [--timeout <sec>] [--needs-screen-recording] <command ...>
clawdis-mac status
clawdis-mac rpc-status
clawdis-mac agent --message <text> [--thinking <low|default|high>] [--session <key>]
clawdis-mac agent --message <text> [--thinking <low|default|high>] [--session <key>] [--deliver] [--to <E.164>]
clawdis-mac --help
Returns JSON to stdout:

View File

@@ -25,7 +25,7 @@ public enum Request: Sendable {
timeoutSec: Double?,
needsScreenRecording: Bool)
case status
case agent(message: String, thinking: String?, session: String?)
case agent(message: String, thinking: String?, session: String?, deliver: Bool, to: String?)
case rpcStatus
}
@@ -53,7 +53,7 @@ extension Request: Codable {
case caps, interactive
case displayID, windowID, format
case command, cwd, env, timeoutSec, needsScreenRecording
case message, thinking, session
case message, thinking, session, deliver, to
case rpcStatus
}
@@ -98,11 +98,13 @@ extension Request: Codable {
case .status:
try container.encode(Kind.status, forKey: .type)
case let .agent(message, thinking, session):
case let .agent(message, thinking, session, deliver, to):
try container.encode(Kind.agent, forKey: .type)
try container.encode(message, forKey: .message)
try container.encodeIfPresent(thinking, forKey: .thinking)
try container.encodeIfPresent(session, forKey: .session)
try container.encode(deliver, forKey: .deliver)
try container.encodeIfPresent(to, forKey: .to)
case .rpcStatus:
try container.encode(Kind.rpcStatus, forKey: .type)
@@ -145,7 +147,9 @@ extension Request: Codable {
let message = try container.decode(String.self, forKey: .message)
let thinking = try container.decodeIfPresent(String.self, forKey: .thinking)
let session = try container.decodeIfPresent(String.self, forKey: .session)
self = .agent(message: message, thinking: thinking, session: session)
let deliver = try container.decode(Bool.self, forKey: .deliver)
let to = try container.decodeIfPresent(String.self, forKey: .to)
self = .agent(message: message, thinking: thinking, session: session, deliver: deliver, to: to)
case .rpcStatus:
self = .rpcStatus

View File

@@ -11,7 +11,7 @@ import Testing
}
@Test func rejectEmptyMessage() async {
let result = await AgentRPC.shared.send(text: "", thinking: nil, session: "main")
let result = await AgentRPC.shared.send(text: "", thinking: nil, session: "main", deliver: false, to: nil)
#expect(result.ok == false)
}
}

View File

@@ -1,9 +1,9 @@
import chalk from "chalk";
import { Command } from "commander";
import { agentCommand } from "../commands/agent.js";
import { healthCommand } from "../commands/health.js";
import { sendCommand } from "../commands/send.js";
import { sessionsCommand } from "../commands/sessions.js";
import { healthCommand } from "../commands/health.js";
import { statusCommand } from "../commands/status.js";
import { loadConfig } from "../config/config.js";
import { danger, info, setVerbose } from "../globals.js";
@@ -236,6 +236,10 @@ Examples:
if (!line.trim()) return;
try {
const cmd = JSON.parse(line);
if (cmd.type === "status") {
respond({ type: "result", ok: true });
return;
}
if (cmd.type !== "send" || !cmd.text) {
respond({ type: "error", error: "unsupported command" });
return;
@@ -253,12 +257,14 @@ Examples:
to?: string;
sessionId?: string;
thinking?: string;
deliver?: boolean;
json: boolean;
} = {
message: String(cmd.text),
to: cmd.to ? String(cmd.to) : undefined,
sessionId: cmd.session ? String(cmd.session) : undefined,
thinking: cmd.thinking ? String(cmd.thinking) : undefined,
deliver: Boolean(cmd.deliver),
json: true,
};
@@ -572,20 +578,29 @@ Examples:
program
.command("health")
.description("Probe WhatsApp Web health (creds + Baileys connect) and session store")
.description(
"Probe WhatsApp Web health (creds + Baileys connect) and session store",
)
.option("--json", "Output JSON instead of text", false)
.option("--timeout <ms>", "Connection timeout in milliseconds", "10000")
.option("--verbose", "Verbose logging", false)
.action(async (opts) => {
setVerbose(Boolean(opts.verbose));
const timeout = opts.timeout ? Number.parseInt(String(opts.timeout), 10) : undefined;
const timeout = opts.timeout
? Number.parseInt(String(opts.timeout), 10)
: undefined;
if (timeout !== undefined && (Number.isNaN(timeout) || timeout <= 0)) {
defaultRuntime.error("--timeout must be a positive integer (milliseconds)");
defaultRuntime.error(
"--timeout must be a positive integer (milliseconds)",
);
defaultRuntime.exit(1);
return;
}
try {
await healthCommand({ json: Boolean(opts.json), timeoutMs: timeout }, defaultRuntime);
await healthCommand(
{ json: Boolean(opts.json), timeoutMs: timeout },
defaultRuntime,
);
} catch (err) {
defaultRuntime.error(String(err));
defaultRuntime.exit(1);

View File

@@ -23,6 +23,7 @@ import {
} from "../config/sessions.js";
import { runCommandWithTimeout } from "../process/exec.js";
import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import { normalizeE164 } from "../utils.js";
import { sendViaIpc } from "../web/ipc.js";
type AgentCommandOpts = {
@@ -162,13 +163,13 @@ export async function agentCommand(
if (!opts.to && !opts.sessionId) {
throw new Error("Pass --to <E.164> or --session-id to choose a session");
}
if (opts.deliver && !opts.to) {
throw new Error("Delivering to WhatsApp requires --to <E.164>");
}
const cfg = loadConfig();
const replyCfg = assertCommandConfig(cfg);
const sessionCfg = replyCfg.session;
const allowFrom = (cfg.inbound?.allowFrom ?? [])
.map((val) => normalizeE164(val))
.filter((val) => val.length > 1);
const thinkOverride = normalizeThinkLevel(opts.thinking);
if (opts.thinking && !thinkOverride) {
@@ -340,6 +341,12 @@ export async function agentCommand(
}
const deliver = opts.deliver === true;
const targetTo = opts.to ? normalizeE164(opts.to) : allowFrom[0];
if (deliver && !targetTo) {
throw new Error(
"Delivering to WhatsApp requires --to <E.164> or inbound.allowFrom[0]",
);
}
for (const payload of payloads) {
const lines: string[] = [];
@@ -351,29 +358,29 @@ export async function agentCommand(
}
runtime.log(lines.join("\n"));
if (deliver && opts.to) {
if (deliver && targetTo) {
const text = payload.text ?? "";
const media = mediaList;
// Prefer IPC to reuse the running relay; fall back to direct web send.
let sentViaIpc = false;
const ipcResult = await sendViaIpc(opts.to, text, media[0]);
const ipcResult = await sendViaIpc(targetTo, text, media[0]);
if (ipcResult) {
sentViaIpc = ipcResult.success;
if (ipcResult.success && media.length > 1) {
for (const extra of media.slice(1)) {
await sendViaIpc(opts.to, "", extra);
await sendViaIpc(targetTo, "", extra);
}
}
}
if (!sentViaIpc) {
if (text || media.length === 0) {
await deps.sendMessageWeb(opts.to, text, {
await deps.sendMessageWeb(targetTo, text, {
verbose: false,
mediaUrl: media[0],
});
}
for (const extra of media.slice(1)) {
await deps.sendMessageWeb(opts.to, "", {
await deps.sendMessageWeb(targetTo, "", {
verbose: false,
mediaUrl: extra,
});

View File

@@ -1,4 +1,4 @@
import { describe, expect, it, vi, beforeEach } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { healthCommand } from "./health.js";
@@ -23,7 +23,10 @@ const waitForWaConnection = vi.fn();
const webAuthExists = vi.fn();
vi.mock("../web/session.js", () => ({
createWaSocket: vi.fn(async () => ({ ws: { close: vi.fn() }, ev: { on: vi.fn() } })),
createWaSocket: vi.fn(async () => ({
ws: { close: vi.fn() },
ev: { on: vi.fn() },
})),
waitForWaConnection: (...args: unknown[]) => waitForWaConnection(...args),
webAuthExists: (...args: unknown[]) => webAuthExists(...args),
getStatusCode: vi.fn(() => 440),

View File

@@ -2,10 +2,7 @@ import fs from "node:fs";
import path from "node:path";
import { loadConfig } from "../config/config.js";
import {
loadSessionStore,
resolveStorePath,
} from "../config/sessions.js";
import { loadSessionStore, resolveStorePath } from "../config/sessions.js";
import { info } from "../globals.js";
import type { RuntimeEnv } from "../runtime.js";
import { resolveHeartbeatSeconds } from "../web/reconnect.js";
@@ -37,7 +34,11 @@ type HealthSummary = {
sessions: {
path: string;
count: number;
recent: Array<{ key: string; updatedAt: number | null; age: number | null }>;
recent: Array<{
key: string;
updatedAt: number | null;
age: number | null;
}>;
};
ipc: { path: string; exists: boolean };
};
@@ -54,7 +55,12 @@ async function probeWebConnect(timeoutMs: number): Promise<HealthConnect> {
setTimeout(() => reject(new Error("timeout")), timeoutMs),
),
]);
return { ok: true, status: null, error: null, elapsedMs: Date.now() - started };
return {
ok: true,
status: null,
error: null,
elapsedMs: Date.now() - started,
};
} catch (err) {
return {
ok: false,
@@ -126,18 +132,25 @@ export async function healthCommand(
}
if (connect) {
const base = connect.ok
? info(`Connect: ok (${connect.elapsedMs}ms)`) : `Connect: failed (${connect.status ?? "unknown"})`;
? info(`Connect: ok (${connect.elapsedMs}ms)`)
: `Connect: failed (${connect.status ?? "unknown"})`;
runtime.log(base + (connect.error ? ` - ${connect.error}` : ""));
}
runtime.log(info(`Heartbeat interval: ${heartbeatSeconds}s`));
runtime.log(info(`Session store: ${storePath} (${sessions.length} entries)`));
runtime.log(
info(`Session store: ${storePath} (${sessions.length} entries)`),
);
if (recent.length > 0) {
runtime.log("Recent sessions:");
for (const r of recent) {
runtime.log(`- ${r.key} (${r.updatedAt ? `${Math.round((Date.now() - r.updatedAt) / 60000)}m ago` : "no activity"})`);
runtime.log(
`- ${r.key} (${r.updatedAt ? `${Math.round((Date.now() - r.updatedAt) / 60000)}m ago` : "no activity"})`,
);
}
}
runtime.log(info(`IPC socket: ${ipcExists ? "present" : "missing"} (${ipcPath})`));
runtime.log(
info(`IPC socket: ${ipcExists ? "present" : "missing"} (${ipcPath})`),
);
}
if (fatal) {

View File

@@ -55,9 +55,9 @@ describe("loginWeb coverage", () => {
output: { statusCode: DisconnectReason.loggedOut },
});
await expect(loginWeb(false, "web", waitForWaConnection as never)).rejects.toThrow(
/cache cleared/i,
);
await expect(
loginWeb(false, "web", waitForWaConnection as never),
).rejects.toThrow(/cache cleared/i);
expect(rmMock).toHaveBeenCalledWith("/tmp/wa-creds", {
recursive: true,
force: true,
@@ -66,9 +66,9 @@ describe("loginWeb coverage", () => {
it("formats and rethrows generic errors", async () => {
waitForWaConnection.mockRejectedValueOnce(new Error("boom"));
await expect(loginWeb(false, "web", waitForWaConnection as never)).rejects.toThrow(
"formatted:Error: boom",
);
await expect(
loginWeb(false, "web", waitForWaConnection as never),
).rejects.toThrow("formatted:Error: boom");
expect(formatError).toHaveBeenCalled();
});
});