fix: avoid invalid UTF-16 in truncation (#567)

This commit is contained in:
Peter Steinberger
2026-01-09 14:19:25 +01:00
parent fd535a50d3
commit 63f5fa47de
7 changed files with 70 additions and 7 deletions

View File

@@ -8,6 +8,7 @@ import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import { logInfo } from "../logger.js";
import { sliceUtf16Safe } from "../utils.js";
import {
addSession,
appendOutput,
@@ -1041,7 +1042,7 @@ function chunkString(input: string, limit = CHUNK_LIMIT) {
function truncateMiddle(str: string, max: number) {
if (str.length <= max) return str;
const half = Math.floor((max - 3) / 2);
return `${str.slice(0, half)}...${str.slice(str.length - half)}`;
return `${sliceUtf16Safe(str, 0, half)}...${sliceUtf16Safe(str, -half)}`;
}
function sliceLogLines(

View File

@@ -9,6 +9,7 @@ import { resolveStateDir } from "../config/paths.js";
import { emitAgentEvent } from "../infra/agent-events.js";
import { createSubsystemLogger } from "../logging.js";
import { splitMediaFromOutput } from "../media/parse.js";
import { truncateUtf16Safe } from "../utils.js";
import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js";
import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
import { isMessagingToolDuplicate } from "./pi-embedded-helpers.js";
@@ -64,7 +65,7 @@ type MessagingToolSend = {
function truncateToolText(text: string): string {
if (text.length <= TOOL_RESULT_MAX_CHARS) return text;
return `${text.slice(0, TOOL_RESULT_MAX_CHARS)}\n…(truncated)…`;
return `${truncateUtf16Safe(text, TOOL_RESULT_MAX_CHARS)}\n…(truncated)…`;
}
function sanitizeToolResult(result: unknown): unknown {

View File

@@ -49,7 +49,7 @@ import {
import { registerAgentRunContext } from "../infra/agent-events.js";
import { parseTelegramTarget } from "../telegram/targets.js";
import { resolveTelegramToken } from "../telegram/token.js";
import { normalizeE164 } from "../utils.js";
import { normalizeE164, truncateUtf16Safe } from "../utils.js";
import type { CronJob } from "./types.js";
export type RunCronAgentTurnResult = {
@@ -68,7 +68,7 @@ function pickSummaryFromOutput(text: string | undefined) {
const clean = (text ?? "").trim();
if (!clean) return undefined;
const limit = 2000;
return clean.length > limit ? `${clean.slice(0, limit)}` : clean;
return clean.length > limit ? `${truncateUtf16Safe(clean, limit)}` : clean;
}
function pickSummaryFromPayloads(

View File

@@ -1,5 +1,6 @@
import crypto from "node:crypto";
import { truncateUtf16Safe } from "../utils.js";
import { computeNextRunAtMs } from "./schedule.js";
import { loadCronStore, saveCronStore } from "./store.js";
import type {
@@ -61,7 +62,7 @@ function normalizeOptionalText(raw: unknown) {
function truncateText(input: string, maxLen: number) {
if (input.length <= maxLen) return input;
return `${input.slice(0, Math.max(0, maxLen - 1)).trimEnd()}`;
return `${truncateUtf16Safe(input, Math.max(0, maxLen - 1)).trimEnd()}`;
}
function inferLegacyName(job: {

View File

@@ -61,6 +61,7 @@ import {
} from "../routing/resolve-route.js";
import { resolveThreadSessionKeys } from "../routing/session-key.js";
import type { RuntimeEnv } from "../runtime.js";
import { truncateUtf16Safe } from "../utils.js";
import { loadWebMedia } from "../web/media.js";
import { resolveDiscordAccount } from "./accounts.js";
import { chunkDiscordText } from "./chunk.js";
@@ -1017,7 +1018,10 @@ export function createDiscordMessageHandler(params: {
}
if (shouldLogVerbose()) {
const preview = combinedBody.slice(0, 200).replace(/\n/g, "\\n");
const preview = truncateUtf16Safe(combinedBody, 200).replace(
/\n/g,
"\\n",
);
logVerbose(
`discord inbound: channel=${message.channelId} from=${ctxPayload.From} preview="${preview}"`,
);

View File

@@ -24,6 +24,7 @@ import {
} from "../pairing/pairing-store.js";
import { resolveAgentRoute } from "../routing/resolve-route.js";
import type { RuntimeEnv } from "../runtime.js";
import { truncateUtf16Safe } from "../utils.js";
import { resolveIMessageAccount } from "./accounts.js";
import { createIMessageRpcClient } from "./client.js";
import { sendMessageIMessage } from "./send.js";
@@ -413,7 +414,7 @@ export async function monitorIMessageProvider(
}
if (shouldLogVerbose()) {
const preview = body.slice(0, 200).replace(/\n/g, "\\n");
const preview = truncateUtf16Safe(body, 200).replace(/\n/g, "\\n");
logVerbose(
`imessage inbound: chatId=${chatId ?? "unknown"} from=${ctxPayload.From} len=${body.length} preview="${preview}"`,
);

View File

@@ -95,6 +95,61 @@ export function sleep(ms: number) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function isHighSurrogate(codeUnit: number): boolean {
return codeUnit >= 0xd800 && codeUnit <= 0xdbff;
}
function isLowSurrogate(codeUnit: number): boolean {
return codeUnit >= 0xdc00 && codeUnit <= 0xdfff;
}
export function sliceUtf16Safe(
input: string,
start: number,
end?: number,
): string {
const len = input.length;
let from = start < 0 ? Math.max(len + start, 0) : Math.min(start, len);
let to =
end === undefined
? len
: end < 0
? Math.max(len + end, 0)
: Math.min(end, len);
if (to < from) {
const tmp = from;
from = to;
to = tmp;
}
if (from > 0 && from < len) {
const codeUnit = input.charCodeAt(from);
if (
isLowSurrogate(codeUnit) &&
isHighSurrogate(input.charCodeAt(from - 1))
) {
from += 1;
}
}
if (to > 0 && to < len) {
const codeUnit = input.charCodeAt(to - 1);
if (isHighSurrogate(codeUnit) && isLowSurrogate(input.charCodeAt(to))) {
to -= 1;
}
}
return input.slice(from, to);
}
export function truncateUtf16Safe(input: string, maxLen: number): string {
const limit = Math.max(0, Math.floor(maxLen));
if (input.length <= limit) return input;
return sliceUtf16Safe(input, 0, limit);
}
export function resolveUserPath(input: string): string {
const trimmed = input.trim();
if (!trimmed) return trimmed;