From 63f5fa47deb6d2f193d7b98634dc0ecafecd77e6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 9 Jan 2026 14:19:25 +0100 Subject: [PATCH] fix: avoid invalid UTF-16 in truncation (#567) --- src/agents/bash-tools.ts | 3 +- src/agents/pi-embedded-subscribe.ts | 3 +- src/cron/isolated-agent.ts | 4 +-- src/cron/service.ts | 3 +- src/discord/monitor.ts | 6 +++- src/imessage/monitor.ts | 3 +- src/utils.ts | 55 +++++++++++++++++++++++++++++ 7 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/agents/bash-tools.ts b/src/agents/bash-tools.ts index bb4aff4c5..b71d1c5ac 100644 --- a/src/agents/bash-tools.ts +++ b/src/agents/bash-tools.ts @@ -8,6 +8,7 @@ import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core"; import { Type } from "@sinclair/typebox"; import { logInfo } from "../logger.js"; +import { sliceUtf16Safe } from "../utils.js"; import { addSession, appendOutput, @@ -1041,7 +1042,7 @@ function chunkString(input: string, limit = CHUNK_LIMIT) { function truncateMiddle(str: string, max: number) { if (str.length <= max) return str; const half = Math.floor((max - 3) / 2); - return `${str.slice(0, half)}...${str.slice(str.length - half)}`; + return `${sliceUtf16Safe(str, 0, half)}...${sliceUtf16Safe(str, -half)}`; } function sliceLogLines( diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index 16643d6fc..3f57c0288 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -9,6 +9,7 @@ import { resolveStateDir } from "../config/paths.js"; import { emitAgentEvent } from "../infra/agent-events.js"; import { createSubsystemLogger } from "../logging.js"; import { splitMediaFromOutput } from "../media/parse.js"; +import { truncateUtf16Safe } from "../utils.js"; import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js"; import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js"; import { isMessagingToolDuplicate } from "./pi-embedded-helpers.js"; @@ -64,7 +65,7 @@ type MessagingToolSend = { function truncateToolText(text: string): string { if (text.length <= TOOL_RESULT_MAX_CHARS) return text; - return `${text.slice(0, TOOL_RESULT_MAX_CHARS)}\n…(truncated)…`; + return `${truncateUtf16Safe(text, TOOL_RESULT_MAX_CHARS)}\n…(truncated)…`; } function sanitizeToolResult(result: unknown): unknown { diff --git a/src/cron/isolated-agent.ts b/src/cron/isolated-agent.ts index 38d23d351..babfba271 100644 --- a/src/cron/isolated-agent.ts +++ b/src/cron/isolated-agent.ts @@ -49,7 +49,7 @@ import { import { registerAgentRunContext } from "../infra/agent-events.js"; import { parseTelegramTarget } from "../telegram/targets.js"; import { resolveTelegramToken } from "../telegram/token.js"; -import { normalizeE164 } from "../utils.js"; +import { normalizeE164, truncateUtf16Safe } from "../utils.js"; import type { CronJob } from "./types.js"; export type RunCronAgentTurnResult = { @@ -68,7 +68,7 @@ function pickSummaryFromOutput(text: string | undefined) { const clean = (text ?? "").trim(); if (!clean) return undefined; const limit = 2000; - return clean.length > limit ? `${clean.slice(0, limit)}…` : clean; + return clean.length > limit ? `${truncateUtf16Safe(clean, limit)}…` : clean; } function pickSummaryFromPayloads( diff --git a/src/cron/service.ts b/src/cron/service.ts index a75cc9ae6..f1e40fdd2 100644 --- a/src/cron/service.ts +++ b/src/cron/service.ts @@ -1,5 +1,6 @@ import crypto from "node:crypto"; +import { truncateUtf16Safe } from "../utils.js"; import { computeNextRunAtMs } from "./schedule.js"; import { loadCronStore, saveCronStore } from "./store.js"; import type { @@ -61,7 +62,7 @@ function normalizeOptionalText(raw: unknown) { function truncateText(input: string, maxLen: number) { if (input.length <= maxLen) return input; - return `${input.slice(0, Math.max(0, maxLen - 1)).trimEnd()}…`; + return `${truncateUtf16Safe(input, Math.max(0, maxLen - 1)).trimEnd()}…`; } function inferLegacyName(job: { diff --git a/src/discord/monitor.ts b/src/discord/monitor.ts index 5fa9426c2..00ddc1f07 100644 --- a/src/discord/monitor.ts +++ b/src/discord/monitor.ts @@ -61,6 +61,7 @@ import { } from "../routing/resolve-route.js"; import { resolveThreadSessionKeys } from "../routing/session-key.js"; import type { RuntimeEnv } from "../runtime.js"; +import { truncateUtf16Safe } from "../utils.js"; import { loadWebMedia } from "../web/media.js"; import { resolveDiscordAccount } from "./accounts.js"; import { chunkDiscordText } from "./chunk.js"; @@ -1017,7 +1018,10 @@ export function createDiscordMessageHandler(params: { } if (shouldLogVerbose()) { - const preview = combinedBody.slice(0, 200).replace(/\n/g, "\\n"); + const preview = truncateUtf16Safe(combinedBody, 200).replace( + /\n/g, + "\\n", + ); logVerbose( `discord inbound: channel=${message.channelId} from=${ctxPayload.From} preview="${preview}"`, ); diff --git a/src/imessage/monitor.ts b/src/imessage/monitor.ts index 44db2e1ba..8cf635989 100644 --- a/src/imessage/monitor.ts +++ b/src/imessage/monitor.ts @@ -24,6 +24,7 @@ import { } from "../pairing/pairing-store.js"; import { resolveAgentRoute } from "../routing/resolve-route.js"; import type { RuntimeEnv } from "../runtime.js"; +import { truncateUtf16Safe } from "../utils.js"; import { resolveIMessageAccount } from "./accounts.js"; import { createIMessageRpcClient } from "./client.js"; import { sendMessageIMessage } from "./send.js"; @@ -413,7 +414,7 @@ export async function monitorIMessageProvider( } if (shouldLogVerbose()) { - const preview = body.slice(0, 200).replace(/\n/g, "\\n"); + const preview = truncateUtf16Safe(body, 200).replace(/\n/g, "\\n"); logVerbose( `imessage inbound: chatId=${chatId ?? "unknown"} from=${ctxPayload.From} len=${body.length} preview="${preview}"`, ); diff --git a/src/utils.ts b/src/utils.ts index d10ee478c..0ddfc6ccf 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -95,6 +95,61 @@ export function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } +function isHighSurrogate(codeUnit: number): boolean { + return codeUnit >= 0xd800 && codeUnit <= 0xdbff; +} + +function isLowSurrogate(codeUnit: number): boolean { + return codeUnit >= 0xdc00 && codeUnit <= 0xdfff; +} + +export function sliceUtf16Safe( + input: string, + start: number, + end?: number, +): string { + const len = input.length; + + let from = start < 0 ? Math.max(len + start, 0) : Math.min(start, len); + let to = + end === undefined + ? len + : end < 0 + ? Math.max(len + end, 0) + : Math.min(end, len); + + if (to < from) { + const tmp = from; + from = to; + to = tmp; + } + + if (from > 0 && from < len) { + const codeUnit = input.charCodeAt(from); + if ( + isLowSurrogate(codeUnit) && + isHighSurrogate(input.charCodeAt(from - 1)) + ) { + from += 1; + } + } + + if (to > 0 && to < len) { + const codeUnit = input.charCodeAt(to - 1); + if (isHighSurrogate(codeUnit) && isLowSurrogate(input.charCodeAt(to))) { + to -= 1; + } + } + + return input.slice(from, to); +} + +export function truncateUtf16Safe(input: string, maxLen: number): string { + const limit = Math.max(0, Math.floor(maxLen)); + if (input.length <= limit) return input; + return sliceUtf16Safe(input, 0, limit); +} + export function resolveUserPath(input: string): string { const trimmed = input.trim(); if (!trimmed) return trimmed;