fix: avoid invalid UTF-16 in truncation (#567)
This commit is contained in:
@@ -8,6 +8,7 @@ import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
|
||||
import { logInfo } from "../logger.js";
|
||||
import { sliceUtf16Safe } from "../utils.js";
|
||||
import {
|
||||
addSession,
|
||||
appendOutput,
|
||||
@@ -1041,7 +1042,7 @@ function chunkString(input: string, limit = CHUNK_LIMIT) {
|
||||
function truncateMiddle(str: string, max: number) {
|
||||
if (str.length <= max) return str;
|
||||
const half = Math.floor((max - 3) / 2);
|
||||
return `${str.slice(0, half)}...${str.slice(str.length - half)}`;
|
||||
return `${sliceUtf16Safe(str, 0, half)}...${sliceUtf16Safe(str, -half)}`;
|
||||
}
|
||||
|
||||
function sliceLogLines(
|
||||
|
||||
@@ -9,6 +9,7 @@ import { resolveStateDir } from "../config/paths.js";
|
||||
import { emitAgentEvent } from "../infra/agent-events.js";
|
||||
import { createSubsystemLogger } from "../logging.js";
|
||||
import { splitMediaFromOutput } from "../media/parse.js";
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js";
|
||||
import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
|
||||
import { isMessagingToolDuplicate } from "./pi-embedded-helpers.js";
|
||||
@@ -64,7 +65,7 @@ type MessagingToolSend = {
|
||||
|
||||
function truncateToolText(text: string): string {
|
||||
if (text.length <= TOOL_RESULT_MAX_CHARS) return text;
|
||||
return `${text.slice(0, TOOL_RESULT_MAX_CHARS)}\n…(truncated)…`;
|
||||
return `${truncateUtf16Safe(text, TOOL_RESULT_MAX_CHARS)}\n…(truncated)…`;
|
||||
}
|
||||
|
||||
function sanitizeToolResult(result: unknown): unknown {
|
||||
|
||||
@@ -49,7 +49,7 @@ import {
|
||||
import { registerAgentRunContext } from "../infra/agent-events.js";
|
||||
import { parseTelegramTarget } from "../telegram/targets.js";
|
||||
import { resolveTelegramToken } from "../telegram/token.js";
|
||||
import { normalizeE164 } from "../utils.js";
|
||||
import { normalizeE164, truncateUtf16Safe } from "../utils.js";
|
||||
import type { CronJob } from "./types.js";
|
||||
|
||||
export type RunCronAgentTurnResult = {
|
||||
@@ -68,7 +68,7 @@ function pickSummaryFromOutput(text: string | undefined) {
|
||||
const clean = (text ?? "").trim();
|
||||
if (!clean) return undefined;
|
||||
const limit = 2000;
|
||||
return clean.length > limit ? `${clean.slice(0, limit)}…` : clean;
|
||||
return clean.length > limit ? `${truncateUtf16Safe(clean, limit)}…` : clean;
|
||||
}
|
||||
|
||||
function pickSummaryFromPayloads(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import crypto from "node:crypto";
|
||||
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
import { computeNextRunAtMs } from "./schedule.js";
|
||||
import { loadCronStore, saveCronStore } from "./store.js";
|
||||
import type {
|
||||
@@ -61,7 +62,7 @@ function normalizeOptionalText(raw: unknown) {
|
||||
|
||||
function truncateText(input: string, maxLen: number) {
|
||||
if (input.length <= maxLen) return input;
|
||||
return `${input.slice(0, Math.max(0, maxLen - 1)).trimEnd()}…`;
|
||||
return `${truncateUtf16Safe(input, Math.max(0, maxLen - 1)).trimEnd()}…`;
|
||||
}
|
||||
|
||||
function inferLegacyName(job: {
|
||||
|
||||
@@ -61,6 +61,7 @@ import {
|
||||
} from "../routing/resolve-route.js";
|
||||
import { resolveThreadSessionKeys } from "../routing/session-key.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
import { loadWebMedia } from "../web/media.js";
|
||||
import { resolveDiscordAccount } from "./accounts.js";
|
||||
import { chunkDiscordText } from "./chunk.js";
|
||||
@@ -1017,7 +1018,10 @@ export function createDiscordMessageHandler(params: {
|
||||
}
|
||||
|
||||
if (shouldLogVerbose()) {
|
||||
const preview = combinedBody.slice(0, 200).replace(/\n/g, "\\n");
|
||||
const preview = truncateUtf16Safe(combinedBody, 200).replace(
|
||||
/\n/g,
|
||||
"\\n",
|
||||
);
|
||||
logVerbose(
|
||||
`discord inbound: channel=${message.channelId} from=${ctxPayload.From} preview="${preview}"`,
|
||||
);
|
||||
|
||||
@@ -24,6 +24,7 @@ import {
|
||||
} from "../pairing/pairing-store.js";
|
||||
import { resolveAgentRoute } from "../routing/resolve-route.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
import { resolveIMessageAccount } from "./accounts.js";
|
||||
import { createIMessageRpcClient } from "./client.js";
|
||||
import { sendMessageIMessage } from "./send.js";
|
||||
@@ -413,7 +414,7 @@ export async function monitorIMessageProvider(
|
||||
}
|
||||
|
||||
if (shouldLogVerbose()) {
|
||||
const preview = body.slice(0, 200).replace(/\n/g, "\\n");
|
||||
const preview = truncateUtf16Safe(body, 200).replace(/\n/g, "\\n");
|
||||
logVerbose(
|
||||
`imessage inbound: chatId=${chatId ?? "unknown"} from=${ctxPayload.From} len=${body.length} preview="${preview}"`,
|
||||
);
|
||||
|
||||
55
src/utils.ts
55
src/utils.ts
@@ -95,6 +95,61 @@ export function sleep(ms: number) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function isHighSurrogate(codeUnit: number): boolean {
|
||||
return codeUnit >= 0xd800 && codeUnit <= 0xdbff;
|
||||
}
|
||||
|
||||
function isLowSurrogate(codeUnit: number): boolean {
|
||||
return codeUnit >= 0xdc00 && codeUnit <= 0xdfff;
|
||||
}
|
||||
|
||||
export function sliceUtf16Safe(
|
||||
input: string,
|
||||
start: number,
|
||||
end?: number,
|
||||
): string {
|
||||
const len = input.length;
|
||||
|
||||
let from = start < 0 ? Math.max(len + start, 0) : Math.min(start, len);
|
||||
let to =
|
||||
end === undefined
|
||||
? len
|
||||
: end < 0
|
||||
? Math.max(len + end, 0)
|
||||
: Math.min(end, len);
|
||||
|
||||
if (to < from) {
|
||||
const tmp = from;
|
||||
from = to;
|
||||
to = tmp;
|
||||
}
|
||||
|
||||
if (from > 0 && from < len) {
|
||||
const codeUnit = input.charCodeAt(from);
|
||||
if (
|
||||
isLowSurrogate(codeUnit) &&
|
||||
isHighSurrogate(input.charCodeAt(from - 1))
|
||||
) {
|
||||
from += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (to > 0 && to < len) {
|
||||
const codeUnit = input.charCodeAt(to - 1);
|
||||
if (isHighSurrogate(codeUnit) && isLowSurrogate(input.charCodeAt(to))) {
|
||||
to -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
return input.slice(from, to);
|
||||
}
|
||||
|
||||
export function truncateUtf16Safe(input: string, maxLen: number): string {
|
||||
const limit = Math.max(0, Math.floor(maxLen));
|
||||
if (input.length <= limit) return input;
|
||||
return sliceUtf16Safe(input, 0, limit);
|
||||
}
|
||||
|
||||
export function resolveUserPath(input: string): string {
|
||||
const trimmed = input.trim();
|
||||
if (!trimmed) return trimmed;
|
||||
|
||||
Reference in New Issue
Block a user