diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index 7b2279391..b21477e71 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -1,9 +1,10 @@ import type { AgentTool, AgentToolResult } from "@mariozechner/pi-ai"; import { bashTool, codingTools, readTool } from "@mariozechner/pi-coding-agent"; -import type { TSchema } from "@sinclair/typebox"; +import { Type, type TSchema } from "@sinclair/typebox"; import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js"; import { detectMime } from "../media/mime.js"; +import { startWebLoginWithQr, waitForWebLogin } from "../web/login-qr.js"; // TODO(steipete): Remove this wrapper once pi-mono ships file-magic MIME detection // for `read` image payloads in `@mariozechner/pi-coding-agent` (then switch back to `codingTools` directly). @@ -103,6 +104,70 @@ function normalizeReadImageResult( type AnyAgentTool = AgentTool; +function createWhatsAppLoginTool(): AnyAgentTool { + return { + label: "WhatsApp Login", + name: "whatsapp_login", + description: + "Generate a WhatsApp QR code for linking, or wait for the scan to complete.", + parameters: Type.Object({ + action: Type.Union([Type.Literal("start"), Type.Literal("wait")]), + timeoutMs: Type.Optional(Type.Number()), + force: Type.Optional(Type.Boolean()), + }), + execute: async (_toolCallId, args) => { + const action = (args as { action?: string })?.action ?? "start"; + if (action === "wait") { + const result = await waitForWebLogin({ + timeoutMs: + typeof (args as { timeoutMs?: unknown }).timeoutMs === "number" + ? (args as { timeoutMs?: number }).timeoutMs + : undefined, + }); + return { + content: [{ type: "text", text: result.message }], + details: { connected: result.connected }, + }; + } + + const result = await startWebLoginWithQr({ + timeoutMs: + typeof (args as { timeoutMs?: unknown }).timeoutMs === "number" + ? (args as { timeoutMs?: number }).timeoutMs + : undefined, + force: + typeof (args as { force?: unknown }).force === "boolean" + ? (args as { force?: boolean }).force + : false, + }); + + if (!result.qrDataUrl) { + return { + content: [ + { + type: "text", + text: result.message, + }, + ], + details: { qr: false }, + }; + } + + const text = [ + result.message, + "", + "Open WhatsApp → Linked Devices and scan:", + "", + `![whatsapp-qr](${result.qrDataUrl})`, + ].join("\n"); + return { + content: [{ type: "text", text }], + details: { qr: true }, + }; + }, + }; +} + function isImageBlock(block: unknown): block is ImageContentBlock { if (!block || typeof block !== "object") return false; const rec = block as Record; @@ -266,11 +331,12 @@ function createClawdisBashTool(base: AnyAgentTool): AnyAgentTool { } export function createClawdisCodingTools(): AnyAgentTool[] { - return (codingTools as unknown as AnyAgentTool[]).map((tool) => + const base = (codingTools as unknown as AnyAgentTool[]).map((tool) => tool.name === readTool.name ? createClawdisReadTool(tool) : tool.name === bashTool.name ? createClawdisBashTool(tool) : (tool as AnyAgentTool), ); + return [...base, createWhatsAppLoginTool()]; } diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index c5c154d57..a98895502 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -27,6 +27,7 @@ function describeBuiltInTools(): string { "- bash: run shell commands", "- edit: apply precise in-file replacements", "- write: create/overwrite files", + "- whatsapp_login: generate a WhatsApp QR code and wait for linking", ].join("\n"); } diff --git a/src/web/login-qr.ts b/src/web/login-qr.ts new file mode 100644 index 000000000..35f60f597 --- /dev/null +++ b/src/web/login-qr.ts @@ -0,0 +1,193 @@ +import { randomUUID } from "node:crypto"; + +import { danger, info, success } from "../globals.js"; +import { logInfo } from "../logger.js"; +import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; +import { + createWaSocket, + formatError, + readWebSelfId, + waitForWaConnection, + webAuthExists, +} from "./session.js"; +import { renderQrPngBase64 } from "./qr-image.js"; + +type WaSocket = Awaited>; + +type ActiveLogin = { + id: string; + sock: WaSocket; + startedAt: number; + qr?: string; + qrDataUrl?: string; + connected: boolean; + error?: string; + waitPromise: Promise; +}; + +const ACTIVE_LOGIN_TTL_MS = 3 * 60_000; +let activeLogin: ActiveLogin | null = null; + +function closeSocket(sock: WaSocket) { + try { + sock.ws?.close(); + } catch { + // ignore + } +} + +async function resetActiveLogin(reason?: string) { + if (activeLogin) { + closeSocket(activeLogin.sock); + activeLogin = null; + } + if (reason) { + logInfo(reason); + } +} + +function isLoginFresh(login: ActiveLogin) { + return Date.now() - login.startedAt < ACTIVE_LOGIN_TTL_MS; +} + +export async function startWebLoginWithQr( + opts: { + verbose?: boolean; + timeoutMs?: number; + force?: boolean; + runtime?: RuntimeEnv; + } = {}, +): Promise<{ qrDataUrl?: string; message: string }> { + const runtime = opts.runtime ?? defaultRuntime; + const hasWeb = await webAuthExists(); + const selfId = readWebSelfId(); + if (hasWeb && !opts.force) { + const who = selfId.e164 ?? selfId.jid ?? "unknown"; + return { + message: `WhatsApp is already linked (${who}). Say “relink” if you want a fresh QR.`, + }; + } + + if (activeLogin && isLoginFresh(activeLogin) && activeLogin.qrDataUrl) { + return { + qrDataUrl: activeLogin.qrDataUrl, + message: "QR already active. Scan it in WhatsApp → Linked Devices.", + }; + } + + await resetActiveLogin(); + + let resolveQr: ((qr: string) => void) | null = null; + let rejectQr: ((err: Error) => void) | null = null; + const qrPromise = new Promise((resolve, reject) => { + resolveQr = resolve; + rejectQr = reject; + }); + + const qrTimer = setTimeout(() => { + rejectQr?.(new Error("Timed out waiting for WhatsApp QR")); + }, Math.max(opts.timeoutMs ?? 30_000, 5000)); + + let sock: WaSocket; + try { + sock = await createWaSocket(false, Boolean(opts.verbose), { + onQr: (qr: string) => { + if (!activeLogin || activeLogin.qr) return; + activeLogin.qr = qr; + clearTimeout(qrTimer); + runtime.log(info("WhatsApp QR received.")); + resolveQr?.(qr); + }, + }); + } catch (err) { + clearTimeout(qrTimer); + await resetActiveLogin(); + return { + message: `Failed to start WhatsApp login: ${String(err)}`, + }; + } + const login: ActiveLogin = { + id: randomUUID(), + sock, + startedAt: Date.now(), + connected: false, + waitPromise: Promise.resolve(), + }; + activeLogin = login; + + login.waitPromise = waitForWaConnection(sock) + .then(() => { + if (activeLogin?.id === login.id) { + activeLogin.connected = true; + } + }) + .catch((err) => { + if (activeLogin?.id === login.id) { + activeLogin.error = formatError(err); + } + }); + + let qr: string; + try { + qr = await qrPromise; + } catch (err) { + clearTimeout(qrTimer); + await resetActiveLogin(); + return { + message: `Failed to get QR: ${String(err)}`, + }; + } + + const base64 = await renderQrPngBase64(qr); + login.qrDataUrl = `data:image/png;base64,${base64}`; + return { + qrDataUrl: login.qrDataUrl, + message: "Scan this QR in WhatsApp → Linked Devices.", + }; +} + +export async function waitForWebLogin( + opts: { timeoutMs?: number; runtime?: RuntimeEnv } = {}, +): Promise<{ connected: boolean; message: string }> { + const runtime = opts.runtime ?? defaultRuntime; + if (!activeLogin) { + return { connected: false, message: "No active WhatsApp login in progress." }; + } + + const login = activeLogin; + if (!isLoginFresh(login)) { + await resetActiveLogin(); + return { + connected: false, + message: "The login QR expired. Ask me to generate a new one.", + }; + } + const timeoutMs = Math.max(opts.timeoutMs ?? 120_000, 1000); + const timeout = new Promise<"timeout">((resolve) => + setTimeout(() => resolve("timeout"), timeoutMs), + ); + const result = await Promise.race([login.waitPromise.then(() => "done"), timeout]); + + if (result === "timeout") { + return { + connected: false, + message: "Still waiting for the QR scan. Let me know when you’ve scanned it.", + }; + } + + if (login.error) { + const message = `WhatsApp login failed: ${login.error}`; + await resetActiveLogin(message); + runtime.log(danger(message)); + return { connected: false, message }; + } + + if (login.connected) { + const message = "✅ Linked! WhatsApp is ready."; + runtime.log(success(message)); + await resetActiveLogin(); + return { connected: true, message }; + } + + return { connected: false, message: "Login ended without a connection." }; +} diff --git a/src/web/qr-image.test.ts b/src/web/qr-image.test.ts new file mode 100644 index 000000000..157025025 --- /dev/null +++ b/src/web/qr-image.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, it } from "vitest"; + +import { renderQrPngBase64 } from "./qr-image.js"; + +describe("renderQrPngBase64", () => { + it("renders a PNG data payload", async () => { + const b64 = await renderQrPngBase64("clawdis"); + const buf = Buffer.from(b64, "base64"); + expect(buf.subarray(0, 8).toString("hex")).toBe("89504e470d0a1a0a"); + }); +}); diff --git a/src/web/qr-image.ts b/src/web/qr-image.ts new file mode 100644 index 000000000..7c3243700 --- /dev/null +++ b/src/web/qr-image.ts @@ -0,0 +1,133 @@ +import { createRequire } from "node:module"; +import { deflateSync } from "node:zlib"; + +type QRCodeConstructor = new ( + typeNumber: number, + errorCorrectLevel: unknown, +) => { + addData: (data: string) => void; + make: () => void; + getModuleCount: () => number; + isDark: (row: number, col: number) => boolean; +}; + +const require = createRequire(import.meta.url); +const QRCode = require( + "qrcode-terminal/vendor/QRCode", +) as QRCodeConstructor; +const QRErrorCorrectLevel = require( + "qrcode-terminal/vendor/QRCode/QRErrorCorrectLevel", +) as Record; + +function createQrMatrix(input: string) { + const qr = new QRCode(-1, QRErrorCorrectLevel.L); + qr.addData(input); + qr.make(); + return qr; +} + +function fillPixel( + buf: Buffer, + x: number, + y: number, + width: number, + r: number, + g: number, + b: number, + a = 255, +) { + const idx = (y * width + x) * 4; + buf[idx] = r; + buf[idx + 1] = g; + buf[idx + 2] = b; + buf[idx + 3] = a; +} + +function crcTable() { + const table = new Uint32Array(256); + for (let i = 0; i < 256; i += 1) { + let c = i; + for (let k = 0; k < 8; k += 1) { + c = c & 1 ? 0xedb88320 ^ (c >>> 1) : c >>> 1; + } + table[i] = c >>> 0; + } + return table; +} + +const CRC_TABLE = crcTable(); + +function crc32(buf: Buffer) { + let crc = 0xffffffff; + for (let i = 0; i < buf.length; i += 1) { + crc = CRC_TABLE[(crc ^ buf[i]) & 0xff] ^ (crc >>> 8); + } + return (crc ^ 0xffffffff) >>> 0; +} + +function pngChunk(type: string, data: Buffer) { + const typeBuf = Buffer.from(type, "ascii"); + const len = Buffer.alloc(4); + len.writeUInt32BE(data.length, 0); + const crc = crc32(Buffer.concat([typeBuf, data])); + const crcBuf = Buffer.alloc(4); + crcBuf.writeUInt32BE(crc, 0); + return Buffer.concat([len, typeBuf, data, crcBuf]); +} + +function encodePngRgba(buffer: Buffer, width: number, height: number) { + const stride = width * 4; + const raw = Buffer.alloc((stride + 1) * height); + for (let row = 0; row < height; row += 1) { + const rawOffset = row * (stride + 1); + raw[rawOffset] = 0; // filter: none + buffer.copy(raw, rawOffset + 1, row * stride, row * stride + stride); + } + const compressed = deflateSync(raw); + + const signature = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]); + const ihdr = Buffer.alloc(13); + ihdr.writeUInt32BE(width, 0); + ihdr.writeUInt32BE(height, 4); + ihdr[8] = 8; // bit depth + ihdr[9] = 6; // color type RGBA + ihdr[10] = 0; // compression + ihdr[11] = 0; // filter + ihdr[12] = 0; // interlace + + return Buffer.concat([ + signature, + pngChunk("IHDR", ihdr), + pngChunk("IDAT", compressed), + pngChunk("IEND", Buffer.alloc(0)), + ]); +} + +export async function renderQrPngBase64( + input: string, + opts: { scale?: number; marginModules?: number } = {}, +): Promise { + const { scale = 6, marginModules = 4 } = opts; + const qr = createQrMatrix(input); + const modules = qr.getModuleCount(); + const size = (modules + marginModules * 2) * scale; + + const buf = Buffer.alloc(size * size * 4, 255); + for (let row = 0; row < modules; row += 1) { + for (let col = 0; col < modules; col += 1) { + if (!qr.isDark(row, col)) continue; + const startX = (col + marginModules) * scale; + const startY = (row + marginModules) * scale; + for (let y = 0; y < scale; y += 1) { + const pixelY = startY + y; + for (let x = 0; x < scale; x += 1) { + const pixelX = startX + x; + fillPixel(buf, pixelX, pixelY, size, 0, 0, 0, 255); + } + } + } + } + + const png = encodePngRgba(buf, size, size); + return png.toString("base64"); +} diff --git a/src/web/session.ts b/src/web/session.ts index 9ad32575e..79462e1ff 100644 --- a/src/web/session.ts +++ b/src/web/session.ts @@ -83,7 +83,11 @@ async function safeSaveCreds( * Create a Baileys socket backed by the multi-file auth store we keep on disk. * Consumers can opt into QR printing for interactive login flows. */ -export async function createWaSocket(printQr: boolean, verbose: boolean) { +export async function createWaSocket( + printQr: boolean, + verbose: boolean, + opts: { onQr?: (qr: string) => void } = {}, +) { const baseLogger = getChildLogger( { module: "baileys" }, { @@ -115,9 +119,12 @@ export async function createWaSocket(printQr: boolean, verbose: boolean) { (update: Partial) => { try { const { connection, lastDisconnect, qr } = update; - if (qr && printQr) { - console.log("Scan this QR in WhatsApp (Linked Devices):"); - qrcode.generate(qr, { small: true }); + if (qr) { + opts.onQr?.(qr); + if (printQr) { + console.log("Scan this QR in WhatsApp (Linked Devices):"); + qrcode.generate(qr, { small: true }); + } } if (connection === "close") { const status = getStatusCode(lastDisconnect?.error);