diff --git a/src/agents/pi-embedded-helpers.test.ts b/src/agents/pi-embedded-helpers.test.ts index ec7b0a662..4272b2a35 100644 --- a/src/agents/pi-embedded-helpers.test.ts +++ b/src/agents/pi-embedded-helpers.test.ts @@ -444,7 +444,7 @@ describe("sanitizeSessionMessagesImages", () => { { role: "user", content: "hello" }, { role: "toolResult", - toolUseId: "tool-1", + toolCallId: "tool-1", content: [{ type: "text", text: "result" }], }, ] satisfies AgentMessage[]; @@ -455,6 +455,88 @@ describe("sanitizeSessionMessagesImages", () => { expect(out[0]?.role).toBe("user"); expect(out[1]?.role).toBe("toolResult"); }); + + it("keeps tool call + tool result IDs unchanged by default", async () => { + const input = [ + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_123|fc_456", + name: "read", + arguments: { path: "package.json" }, + }, + ], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_456", + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const out = await sanitizeSessionMessagesImages(input, "test"); + + const assistant = out[0] as unknown as { role?: string; content?: unknown }; + expect(assistant.role).toBe("assistant"); + expect(Array.isArray(assistant.content)).toBe(true); + const toolCall = ( + assistant.content as Array<{ type?: string; id?: string }> + ).find((b) => b.type === "toolCall"); + expect(toolCall?.id).toBe("call_123|fc_456"); + + const toolResult = out[1] as unknown as { + role?: string; + toolCallId?: string; + }; + expect(toolResult.role).toBe("toolResult"); + expect(toolResult.toolCallId).toBe("call_123|fc_456"); + }); + + it("sanitizes tool call + tool result IDs when enabled", async () => { + const input = [ + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_123|fc_456", + name: "read", + arguments: { path: "package.json" }, + }, + ], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_456", + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const out = await sanitizeSessionMessagesImages(input, "test", { + sanitizeToolCallIds: true, + }); + + const assistant = out[0] as unknown as { role?: string; content?: unknown }; + expect(assistant.role).toBe("assistant"); + expect(Array.isArray(assistant.content)).toBe(true); + const toolCall = ( + assistant.content as Array<{ type?: string; id?: string }> + ).find((b) => b.type === "toolCall"); + expect(toolCall?.id).toBe("call_123_fc_456"); + + const toolResult = out[1] as unknown as { + role?: string; + toolCallId?: string; + }; + expect(toolResult.role).toBe("toolResult"); + expect(toolResult.toolCallId).toBe("call_123_fc_456"); + }); }); describe("normalizeTextForComparison", () => { diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 61f0b5798..908940848 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -85,6 +85,7 @@ function isEmptyAssistantErrorMessage( export async function sanitizeSessionMessagesImages( messages: AgentMessage[], label: string, + options?: { sanitizeToolCallIds?: boolean }, ): Promise { // We sanitize historical session messages because Anthropic can reject a request // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX). @@ -103,12 +104,13 @@ export async function sanitizeSessionMessagesImages( content as ContentBlock[], label, )) as unknown as typeof toolMsg.content; - const sanitizedToolCallId = toolMsg.toolCallId - ? sanitizeToolCallId(toolMsg.toolCallId) - : undefined; + const sanitizedToolCallId = + options?.sanitizeToolCallIds && toolMsg.toolCallId + ? sanitizeToolCallId(toolMsg.toolCallId) + : undefined; const toolUseId = (toolMsg as { toolUseId?: unknown }).toolUseId; const sanitizedToolUseId = - typeof toolUseId === "string" && toolUseId + options?.sanitizeToolCallIds && typeof toolUseId === "string" && toolUseId ? sanitizeToolCallId(toolUseId) : undefined; const sanitizedMsg = { @@ -151,30 +153,31 @@ export async function sanitizeSessionMessagesImages( if (rec.type !== "text" || typeof rec.text !== "string") return true; return rec.text.trim().length > 0; }); - // Also sanitize tool call IDs in assistant messages (function call blocks) - const sanitizedContent = await Promise.all( - filteredContent.map(async (block) => { - if (!block || typeof block !== "object") return block; + const sanitizedContent = options?.sanitizeToolCallIds + ? await Promise.all( + filteredContent.map(async (block) => { + if (!block || typeof block !== "object") return block; - const type = (block as { type?: unknown }).type; - const id = (block as { id?: unknown }).id; - if (typeof id !== "string" || !id) return block; + const type = (block as { type?: unknown }).type; + const id = (block as { id?: unknown }).id; + if (typeof id !== "string" || !id) return block; - // Cloud Code Assist tool blocks require ids matching ^[a-zA-Z0-9_-]+$. - if ( - type === "functionCall" || - type === "toolUse" || - type === "toolCall" - ) { - return { - ...(block as unknown as Record), - id: sanitizeToolCallId(id), - }; - } + // Cloud Code Assist tool blocks require ids matching ^[a-zA-Z0-9_-]+$. + if ( + type === "functionCall" || + type === "toolUse" || + type === "toolCall" + ) { + return { + ...(block as unknown as Record), + id: sanitizeToolCallId(id), + }; + } - return block; - }), - ); + return block; + }), + ) + : filteredContent; const finalContent = (await sanitizeContentBlocksImages( sanitizedContent as unknown as ContentBlock[], label, diff --git a/src/agents/pi-embedded-runner.test.ts b/src/agents/pi-embedded-runner.test.ts index 1704cd1ae..ea2cdd6bf 100644 --- a/src/agents/pi-embedded-runner.test.ts +++ b/src/agents/pi-embedded-runner.test.ts @@ -1,3 +1,6 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; import type { AgentMessage, AgentTool } from "@mariozechner/pi-agent-core"; import { SessionManager } from "@mariozechner/pi-coding-agent"; import { Type } from "@sinclair/typebox"; @@ -8,6 +11,7 @@ import { applyGoogleTurnOrderingFix, buildEmbeddedSandboxInfo, createSystemPromptOverride, + runEmbeddedPiAgent, splitSdkTools, } from "./pi-embedded-runner.js"; import type { SandboxContext } from "./sandbox.js"; @@ -229,3 +233,57 @@ describe("applyGoogleTurnOrderingFix", () => { expect(warn).not.toHaveBeenCalled(); }); }); + +describe("runEmbeddedPiAgent", () => { + it("writes models.json into the provided agentDir", async () => { + const agentDir = await fs.mkdtemp( + path.join(os.tmpdir(), "clawdbot-agent-"), + ); + const workspaceDir = await fs.mkdtemp( + path.join(os.tmpdir(), "clawdbot-workspace-"), + ); + const sessionFile = path.join(workspaceDir, "session.jsonl"); + + const cfg = { + models: { + providers: { + minimax: { + baseUrl: "https://api.minimax.io/v1", + api: "openai-completions", + apiKey: "sk-minimax-test", + models: [ + { + id: "minimax-m2.1", + name: "MiniMax M2.1", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + }, + ], + }, + }, + }, + } satisfies ClawdbotConfig; + + await expect( + runEmbeddedPiAgent({ + sessionId: "session:test", + sessionKey: "agent:dev:test", + sessionFile, + workspaceDir, + config: cfg, + prompt: "hi", + provider: "definitely-not-a-provider", + model: "definitely-not-a-model", + timeoutMs: 1, + agentDir, + }), + ).rejects.toThrow(/Unknown model:/); + + await expect( + fs.stat(path.join(agentDir, "models.json")), + ).resolves.toBeTruthy(); + }); +}); diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 90afd6d0d..48cd2cf06 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -374,6 +374,7 @@ async function sanitizeSessionHistory(params: { const sanitizedImages = await sanitizeSessionMessagesImages( params.messages, "session:history", + { sanitizeToolCallIds: isGoogleModelApi(params.modelApi) }, ); return applyGoogleTurnOrderingFix({ messages: sanitizedImages, @@ -762,8 +763,8 @@ export async function compactEmbeddedPiSession(params: { const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; - await ensureClawdbotModelsJson(params.config); const agentDir = params.agentDir ?? resolveClawdbotAgentDir(); + await ensureClawdbotModelsJson(params.config, agentDir); const { model, error, authStorage, modelRegistry } = resolveModel( provider, modelId, @@ -1065,8 +1066,8 @@ export async function runEmbeddedPiAgent(params: { const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; - await ensureClawdbotModelsJson(params.config); const agentDir = params.agentDir ?? resolveClawdbotAgentDir(); + await ensureClawdbotModelsJson(params.config, agentDir); const { model, error, authStorage, modelRegistry } = resolveModel( provider, modelId, diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index e3b59d12b..1c92d65e8 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -496,6 +496,21 @@ export function subscribeEmbeddedPiSession(params: { (evt as AgentEvent & { toolCallId: string }).toolCallId, ); const args = (evt as AgentEvent & { args: unknown }).args; + if (toolName === "read") { + const record = + args && typeof args === "object" + ? (args as Record) + : {}; + const filePath = + typeof record.path === "string" ? record.path.trim() : ""; + if (!filePath) { + const argsPreview = + typeof args === "string" ? args.slice(0, 200) : undefined; + log.warn( + `read tool called without path: toolCallId=${toolCallId} argsType=${typeof args}${argsPreview ? ` argsPreview=${argsPreview}` : ""}`, + ); + } + } const meta = inferToolMetaFromArgs(toolName, args); toolMetaById.set(toolCallId, meta); log.debug( diff --git a/src/config/io.ts b/src/config/io.ts index 0f514173a..05fa98c2d 100644 --- a/src/config/io.ts +++ b/src/config/io.ts @@ -22,11 +22,7 @@ import { applyTalkApiKey, } from "./defaults.js"; import { findLegacyConfigIssues } from "./legacy.js"; -import { - CONFIG_PATH_CLAWDBOT, - resolveConfigPath, - resolveStateDir, -} from "./paths.js"; +import { resolveConfigPath, resolveStateDir } from "./paths.js"; import { applyConfigOverrides } from "./runtime-overrides.js"; import type { ClawdbotConfig, @@ -348,8 +344,21 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) { }; } -const defaultIO = createConfigIO({ configPath: CONFIG_PATH_CLAWDBOT }); +// NOTE: These wrappers intentionally do *not* cache the resolved config path at +// module scope. `CLAWDBOT_CONFIG_PATH` (and friends) are expected to work even +// when set after the module has been imported (tests, one-off scripts, etc.). +export function loadConfig(): ClawdbotConfig { + return createConfigIO({ configPath: resolveConfigPath() }).loadConfig(); +} -export const loadConfig = defaultIO.loadConfig; -export const readConfigFileSnapshot = defaultIO.readConfigFileSnapshot; -export const writeConfigFile = defaultIO.writeConfigFile; +export async function readConfigFileSnapshot(): Promise { + return await createConfigIO({ + configPath: resolveConfigPath(), + }).readConfigFileSnapshot(); +} + +export async function writeConfigFile(cfg: ClawdbotConfig): Promise { + await createConfigIO({ configPath: resolveConfigPath() }).writeConfigFile( + cfg, + ); +} diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index 9458cc6a0..be65c705f 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -206,17 +206,41 @@ describeLive("gateway live (dev agent, profile keys)", () => { expect(candidates.length).toBeGreaterThan(0); // Build a temp config that allows all selected models, so session overrides stick. + const lmstudioProvider = cfg.models?.providers?.lmstudio; const nextCfg = { ...cfg, agents: { ...cfg.agents, + list: (cfg.agents?.list ?? []).map((entry) => ({ + ...entry, + sandbox: { mode: "off" }, + })), defaults: { ...cfg.agents?.defaults, + // Live tests should avoid Docker sandboxing so tool probes can + // operate on the temporary probe files we create in the host workspace. + sandbox: { mode: "off" }, models: Object.fromEntries( candidates.map((m) => [`${m.provider}/${m.id}`, {}]), ), }, }, + models: { + ...cfg.models, + providers: { + ...cfg.models?.providers, + // LM Studio is most reliable via Chat Completions; its Responses API + // tool-calling behavior is inconsistent across releases. + ...(lmstudioProvider + ? { + lmstudio: { + ...lmstudioProvider, + api: "openai-completions", + }, + } + : {}), + }, + }, }; const tempDir = await fs.mkdtemp( path.join(os.tmpdir(), "clawdbot-live-"), @@ -275,8 +299,8 @@ describeLive("gateway live (dev agent, profile keys)", () => { const text = extractPayloadText(payload?.result); if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`); if ( - !/\\bmicrotask\\b/i.test(text) || - !/\\bmacrotask\\b/i.test(text) + !/\bmicro\s*-?\s*tasks?\b/i.test(text) || + !/\bmacro\s*-?\s*tasks?\b/i.test(text) ) { throw new Error(`missing required keywords: ${text}`); } @@ -289,7 +313,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { sessionKey, idempotencyKey: `idem-${runIdTool}-tool`, message: - `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) on "${toolProbePath}". ` + + `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` + `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`, deliver: false, }, @@ -403,7 +427,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { ); } const version = extractPayloadText(second?.result); - if (!/^\\d{4}\\.\\d+\\.\\d+/.test(version.trim())) { + if (!/^\d{4}\.\d+\.\d+/.test(version.trim())) { throw new Error(`unexpected version: ${version}`); } } diff --git a/test/test-env.ts b/test/test-env.ts index 3f5add567..1b931ac63 100644 --- a/test/test-env.ts +++ b/test/test-env.ts @@ -12,6 +12,17 @@ function restoreEnv(entries: RestoreEntry[]): void { } export function installTestEnv(): { cleanup: () => void; tempHome: string } { + const live = + process.env.LIVE === "1" || + process.env.CLAWDBOT_LIVE_TEST === "1" || + process.env.CLAWDBOT_LIVE_GATEWAY === "1"; + + // Live tests must use the real user environment (keys, profiles, config). + // The default test env isolates HOME to avoid touching real state. + if (live) { + return { cleanup: () => {}, tempHome: process.env.HOME ?? "" }; + } + const restore: RestoreEntry[] = [ { key: "HOME", value: process.env.HOME }, { key: "USERPROFILE", value: process.env.USERPROFILE },