Auto-reply: smarter chunking breaks

2025-12-03 00:25:01 +00:00
parent ec46932259
commit b6c45485bc
11 changed files with 239 additions and 50 deletions
--- a/src/auto-reply/chunk.test.ts
+++ b/src/auto-reply/chunk.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from "vitest";
+
+import { chunkText } from "./chunk.js";
+
+describe("chunkText", () => {
+  it("keeps multi-line text in one chunk when under limit", () => {
+    const text = "Line one\n\nLine two\n\nLine three";
+    const chunks = chunkText(text, 1600);
+    expect(chunks).toEqual([text]);
+  });
+
+  it("splits only when text exceeds the limit", () => {
+    const part = "a".repeat(20);
+    const text = part.repeat(5); // 100 chars
+    const chunks = chunkText(text, 60);
+    expect(chunks.length).toBe(2);
+    expect(chunks[0].length).toBe(60);
+    expect(chunks[1].length).toBe(40);
+    expect(chunks.join("")).toBe(text);
+  });
+
+  it("prefers breaking at a newline before the limit", () => {
+    const text = `paragraph one line\n\nparagraph two starts here and continues`;
+    const chunks = chunkText(text, 40);
+    expect(chunks).toEqual([
+      "paragraph one line",
+      "paragraph two starts here and continues",
+    ]);
+  });
+
+  it("otherwise breaks at the last whitespace under the limit", () => {
+    const text = "This is a message that should break nicely near a word boundary.";
+    const chunks = chunkText(text, 30);
+    expect(chunks[0].length).toBeLessThanOrEqual(30);
+    expect(chunks[1].length).toBeLessThanOrEqual(30);
+    expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(
+      text.replace(/\s+/g, " ").trim(),
+    );
+  });
+
+  it("falls back to a hard break when no whitespace is present", () => {
+    const text = "Supercalifragilisticexpialidocious"; // 34 chars
+    const chunks = chunkText(text, 10);
+    expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
+  });
+});
--- a/src/auto-reply/chunk.ts
+++ b/src/auto-reply/chunk.ts
@@ -0,0 +1,48 @@
+// Utilities for splitting outbound text into platform-sized chunks without
+// unintentionally breaking on newlines. Using [\s\S] keeps newlines inside
+// the chunk so messages are only split when they truly exceed the limit.
+
+export function chunkText(text: string, limit: number): string[] {
+  if (!text) return [];
+  if (limit <= 0) return [text];
+  if (text.length <= limit) return [text];
+
+  const chunks: string[] = [];
+  let remaining = text;
+
+  while (remaining.length > limit) {
+    const window = remaining.slice(0, limit);
+
+    // 1) Prefer a newline break inside the window.
+    let breakIdx = window.lastIndexOf("\n");
+
+    // 2) Otherwise prefer the last whitespace (word boundary) inside the window.
+    if (breakIdx <= 0) {
+      for (let i = window.length - 1; i >= 0; i--) {
+        if (/\s/.test(window[i])) {
+          breakIdx = i;
+          break;
+        }
+      }
+    }
+
+    // 3) Fallback: hard break exactly at the limit.
+    if (breakIdx <= 0) breakIdx = limit;
+
+    const rawChunk = remaining.slice(0, breakIdx);
+    const chunk = rawChunk.trimEnd();
+    if (chunk.length > 0) {
+      chunks.push(chunk);
+    }
+
+    // If we broke on whitespace/newline, skip that separator; for hard breaks keep it.
+    const brokeOnSeparator =
+      breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
+    const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
+    remaining = remaining.slice(nextStart).trimStart();
+  }
+
+  if (remaining.length) chunks.push(remaining);
+
+  return chunks;
+}
--- a/src/auto-reply/command-reply.ts
+++ b/src/auto-reply/command-reply.ts
@@ -255,11 +255,11 @@ export async function runCommandReply(
    }

    const parsed = trimmed ? agent.parseOutput(trimmed) : undefined;
+    const parserProvided = !!parsed;

    // Collect one message per assistant text from parseOutput (tau RPC can emit many).
    const parsedTexts =
-      parsed?.texts?.map((t) => t.trim()).filter(Boolean) ??
-      (parsed?.text ? [parsed.text.trim()] : []);
+      parsed?.texts?.map((t) => t.trim()).filter(Boolean) ?? [];

    type ReplyItem = { text: string; media?: string[] };
    const replyItems: ReplyItem[] = [];
@@ -274,7 +274,7 @@ export async function runCommandReply(
    }

    // If parser gave nothing, fall back to raw stdout as a single message.
-    if (replyItems.length === 0 && trimmed) {
+    if (replyItems.length === 0 && trimmed && !parserProvided) {
      const { text: cleanedText, mediaUrls: mediaFound } =
        splitMediaFromOutput(trimmed);
      if (cleanedText || mediaFound?.length) {
@@ -401,7 +401,7 @@ export async function runCommandReply(
    }

    verboseLog(`Command auto-reply meta: ${JSON.stringify(meta)}`);
-    return { payloads, meta };
+    return { payloads, payload: payloads[0], meta };
  } catch (err) {
    const elapsed = Date.now() - started;
    logger.info(
--- a/src/auto-reply/reply.chunking.test.ts
+++ b/src/auto-reply/reply.chunking.test.ts
@@ -0,0 +1,49 @@
+import { describe, expect, it, vi } from "vitest";
+
+import type { WarelayConfig } from "../config/config.js";
+import { autoReplyIfConfigured } from "./reply.js";
+
+describe("autoReplyIfConfigured chunking", () => {
+  it("sends a single Twilio message for multi-line text under limit", async () => {
+    const body = [
+      "Oh! Hi Peter! 🦞",
+      "",
+      "Sorry, I got a bit trigger-happy with the heartbeat response there. What's up?",
+      "",
+      "Everything working on your end?",
+    ].join("\n");
+
+    const config: WarelayConfig = {
+      inbound: {
+        reply: {
+          mode: "text",
+          text: body,
+        },
+      },
+    };
+
+    const create = vi.fn().mockResolvedValue({});
+    const client = { messages: { create } } as unknown as Parameters<
+      typeof autoReplyIfConfigured
+    >[0];
+
+    const message = {
+      body: "ping",
+      from: "+15551234567",
+      to: "+15557654321",
+      sid: "SM123",
+    } as Parameters<typeof autoReplyIfConfigured>[1];
+
+    await autoReplyIfConfigured(client, message, config);
+
+    expect(create).toHaveBeenCalledTimes(1);
+    expect(create).toHaveBeenCalledWith(
+      expect.objectContaining({
+        body,
+        from: message.to,
+        to: message.from,
+      }),
+    );
+  });
+});
+
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -1,5 +1,4 @@
 import crypto from "node:crypto";
-
 import type { MessageInstance } from "twilio/lib/rest/api/v2010/account/message.js";
 import { loadConfig, type WarelayConfig } from "../config/config.js";
 import {
@@ -18,6 +17,7 @@ import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
 import type { TwilioRequester } from "../twilio/types.js";
 import { sendTypingIndicator } from "../twilio/typing.js";
 import { runCommandReply } from "./command-reply.js";
+import { chunkText } from "./chunk.js";
 import {
  applyTemplate,
  type MsgContext,
@@ -307,7 +307,7 @@ export async function getReplyFromConfig(
      mediaUrl: reply.mediaUrl,
    };
    cleanupTyping();
-    return [result];
+    return result;
  }

  if (reply && reply.mode === "command" && reply.command?.length) {
@@ -318,7 +318,7 @@ export async function getReplyFromConfig(
      mode: "command" as const,
    };
    try {
-      const { payloads, meta } = await runCommandReply({
+      const runResult = await runCommandReply({
        reply: commandReply,
        templatingCtx,
        sendSystemOnce,
@@ -329,6 +329,17 @@ export async function getReplyFromConfig(
        timeoutSeconds,
        commandRunner,
      });
+      const payloadArray =
+        runResult.payloads ?? (runResult.payload ? [runResult.payload] : []);
+      const meta = runResult.meta;
+      const normalizedPayloads =
+        payloadArray.length === 1 ? payloadArray[0] : payloadArray;
+      if (
+        !normalizedPayloads ||
+        (Array.isArray(normalizedPayloads) && normalizedPayloads.length === 0)
+      ) {
+        return undefined;
+      }
      if (sessionCfg && sessionStore && sessionKey) {
        const returnedSessionId = meta.agentMeta?.sessionId;
        if (returnedSessionId && returnedSessionId !== sessionId) {
@@ -357,7 +368,7 @@ export async function getReplyFromConfig(
      if (meta.agentMeta && isVerbose()) {
        logVerbose(`Agent meta: ${JSON.stringify(meta.agentMeta)}`);
      }
-      return payloads;
+      return normalizedPayloads;
    } finally {
      cleanupTyping();
    }
@@ -459,10 +470,8 @@ export async function autoReplyIfConfigured(
          : [];

      const text = replyPayload.text ?? "";
-      const chunks =
-        text.length > 0
-          ? (text.match(new RegExp(`.{1,${TWILIO_TEXT_LIMIT}}`, "g")) ?? [])
-          : [""];
+      const chunks = chunkText(text, TWILIO_TEXT_LIMIT);
+      if (chunks.length === 0) chunks.push("");

      for (let i = 0; i < chunks.length; i++) {
        const body = chunks[i];