fix: restore minimax tool calling

2026-01-10 18:12:32 +00:00
parent c264e98c62
commit 651a9e9be4
8 changed files with 244 additions and 41 deletions
--- a/src/agents/pi-embedded-helpers.test.ts
+++ b/src/agents/pi-embedded-helpers.test.ts
@@ -444,7 +444,7 @@ describe("sanitizeSessionMessagesImages", () => {
      { role: "user", content: "hello" },
      {
        role: "toolResult",
-        toolUseId: "tool-1",
+        toolCallId: "tool-1",
        content: [{ type: "text", text: "result" }],
      },
    ] satisfies AgentMessage[];
@@ -455,6 +455,88 @@ describe("sanitizeSessionMessagesImages", () => {
    expect(out[0]?.role).toBe("user");
    expect(out[1]?.role).toBe("toolResult");
  });
+
+  it("keeps tool call + tool result IDs unchanged by default", async () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "toolCall",
+            id: "call_123|fc_456",
+            name: "read",
+            arguments: { path: "package.json" },
+          },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_456",
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      },
+    ] satisfies AgentMessage[];
+
+    const out = await sanitizeSessionMessagesImages(input, "test");
+
+    const assistant = out[0] as unknown as { role?: string; content?: unknown };
+    expect(assistant.role).toBe("assistant");
+    expect(Array.isArray(assistant.content)).toBe(true);
+    const toolCall = (
+      assistant.content as Array<{ type?: string; id?: string }>
+    ).find((b) => b.type === "toolCall");
+    expect(toolCall?.id).toBe("call_123|fc_456");
+
+    const toolResult = out[1] as unknown as {
+      role?: string;
+      toolCallId?: string;
+    };
+    expect(toolResult.role).toBe("toolResult");
+    expect(toolResult.toolCallId).toBe("call_123|fc_456");
+  });
+
+  it("sanitizes tool call + tool result IDs when enabled", async () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "toolCall",
+            id: "call_123|fc_456",
+            name: "read",
+            arguments: { path: "package.json" },
+          },
+        ],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_123|fc_456",
+        toolName: "read",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      },
+    ] satisfies AgentMessage[];
+
+    const out = await sanitizeSessionMessagesImages(input, "test", {
+      sanitizeToolCallIds: true,
+    });
+
+    const assistant = out[0] as unknown as { role?: string; content?: unknown };
+    expect(assistant.role).toBe("assistant");
+    expect(Array.isArray(assistant.content)).toBe(true);
+    const toolCall = (
+      assistant.content as Array<{ type?: string; id?: string }>
+    ).find((b) => b.type === "toolCall");
+    expect(toolCall?.id).toBe("call_123_fc_456");
+
+    const toolResult = out[1] as unknown as {
+      role?: string;
+      toolCallId?: string;
+    };
+    expect(toolResult.role).toBe("toolResult");
+    expect(toolResult.toolCallId).toBe("call_123_fc_456");
+  });
 });

 describe("normalizeTextForComparison", () => {
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -85,6 +85,7 @@ function isEmptyAssistantErrorMessage(
 export async function sanitizeSessionMessagesImages(
  messages: AgentMessage[],
  label: string,
+  options?: { sanitizeToolCallIds?: boolean },
 ): Promise<AgentMessage[]> {
  // We sanitize historical session messages because Anthropic can reject a request
  // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
@@ -103,12 +104,13 @@ export async function sanitizeSessionMessagesImages(
        content as ContentBlock[],
        label,
      )) as unknown as typeof toolMsg.content;
-      const sanitizedToolCallId = toolMsg.toolCallId
-        ? sanitizeToolCallId(toolMsg.toolCallId)
-        : undefined;
+      const sanitizedToolCallId =
+        options?.sanitizeToolCallIds && toolMsg.toolCallId
+          ? sanitizeToolCallId(toolMsg.toolCallId)
+          : undefined;
      const toolUseId = (toolMsg as { toolUseId?: unknown }).toolUseId;
      const sanitizedToolUseId =
-        typeof toolUseId === "string" && toolUseId
+        options?.sanitizeToolCallIds && typeof toolUseId === "string" && toolUseId
          ? sanitizeToolCallId(toolUseId)
          : undefined;
      const sanitizedMsg = {
@@ -151,30 +153,31 @@ export async function sanitizeSessionMessagesImages(
          if (rec.type !== "text" || typeof rec.text !== "string") return true;
          return rec.text.trim().length > 0;
        });
-        // Also sanitize tool call IDs in assistant messages (function call blocks)
-        const sanitizedContent = await Promise.all(
-          filteredContent.map(async (block) => {
-            if (!block || typeof block !== "object") return block;
+        const sanitizedContent = options?.sanitizeToolCallIds
+          ? await Promise.all(
+              filteredContent.map(async (block) => {
+                if (!block || typeof block !== "object") return block;

-            const type = (block as { type?: unknown }).type;
-            const id = (block as { id?: unknown }).id;
-            if (typeof id !== "string" || !id) return block;
+                const type = (block as { type?: unknown }).type;
+                const id = (block as { id?: unknown }).id;
+                if (typeof id !== "string" || !id) return block;

-            // Cloud Code Assist tool blocks require ids matching ^[a-zA-Z0-9_-]+$.
-            if (
-              type === "functionCall" ||
-              type === "toolUse" ||
-              type === "toolCall"
-            ) {
-              return {
-                ...(block as unknown as Record<string, unknown>),
-                id: sanitizeToolCallId(id),
-              };
-            }
+                // Cloud Code Assist tool blocks require ids matching ^[a-zA-Z0-9_-]+$.
+                if (
+                  type === "functionCall" ||
+                  type === "toolUse" ||
+                  type === "toolCall"
+                ) {
+                  return {
+                    ...(block as unknown as Record<string, unknown>),
+                    id: sanitizeToolCallId(id),
+                  };
+                }

-            return block;
-          }),
-        );
+                return block;
+              }),
+            )
+          : filteredContent;
        const finalContent = (await sanitizeContentBlocksImages(
          sanitizedContent as unknown as ContentBlock[],
          label,
--- a/src/agents/pi-embedded-runner.test.ts
+++ b/src/agents/pi-embedded-runner.test.ts
@@ -1,3 +1,6 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
 import type { AgentMessage, AgentTool } from "@mariozechner/pi-agent-core";
 import { SessionManager } from "@mariozechner/pi-coding-agent";
 import { Type } from "@sinclair/typebox";
@@ -8,6 +11,7 @@ import {
  applyGoogleTurnOrderingFix,
  buildEmbeddedSandboxInfo,
  createSystemPromptOverride,
+  runEmbeddedPiAgent,
  splitSdkTools,
 } from "./pi-embedded-runner.js";
 import type { SandboxContext } from "./sandbox.js";
@@ -229,3 +233,57 @@ describe("applyGoogleTurnOrderingFix", () => {
    expect(warn).not.toHaveBeenCalled();
  });
 });
+
+describe("runEmbeddedPiAgent", () => {
+  it("writes models.json into the provided agentDir", async () => {
+    const agentDir = await fs.mkdtemp(
+      path.join(os.tmpdir(), "clawdbot-agent-"),
+    );
+    const workspaceDir = await fs.mkdtemp(
+      path.join(os.tmpdir(), "clawdbot-workspace-"),
+    );
+    const sessionFile = path.join(workspaceDir, "session.jsonl");
+
+    const cfg = {
+      models: {
+        providers: {
+          minimax: {
+            baseUrl: "https://api.minimax.io/v1",
+            api: "openai-completions",
+            apiKey: "sk-minimax-test",
+            models: [
+              {
+                id: "minimax-m2.1",
+                name: "MiniMax M2.1",
+                reasoning: false,
+                input: ["text"],
+                cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+                contextWindow: 200000,
+                maxTokens: 8192,
+              },
+            ],
+          },
+        },
+      },
+    } satisfies ClawdbotConfig;
+
+    await expect(
+      runEmbeddedPiAgent({
+        sessionId: "session:test",
+        sessionKey: "agent:dev:test",
+        sessionFile,
+        workspaceDir,
+        config: cfg,
+        prompt: "hi",
+        provider: "definitely-not-a-provider",
+        model: "definitely-not-a-model",
+        timeoutMs: 1,
+        agentDir,
+      }),
+    ).rejects.toThrow(/Unknown model:/);
+
+    await expect(
+      fs.stat(path.join(agentDir, "models.json")),
+    ).resolves.toBeTruthy();
+  });
+});
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -374,6 +374,7 @@ async function sanitizeSessionHistory(params: {
  const sanitizedImages = await sanitizeSessionMessagesImages(
    params.messages,
    "session:history",
+    { sanitizeToolCallIds: isGoogleModelApi(params.modelApi) },
  );
  return applyGoogleTurnOrderingFix({
    messages: sanitizedImages,
@@ -762,8 +763,8 @@ export async function compactEmbeddedPiSession(params: {
      const provider =
        (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
      const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
-      await ensureClawdbotModelsJson(params.config);
      const agentDir = params.agentDir ?? resolveClawdbotAgentDir();
+      await ensureClawdbotModelsJson(params.config, agentDir);
      const { model, error, authStorage, modelRegistry } = resolveModel(
        provider,
        modelId,
@@ -1065,8 +1066,8 @@ export async function runEmbeddedPiAgent(params: {
      const provider =
        (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
      const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
-      await ensureClawdbotModelsJson(params.config);
      const agentDir = params.agentDir ?? resolveClawdbotAgentDir();
+      await ensureClawdbotModelsJson(params.config, agentDir);
      const { model, error, authStorage, modelRegistry } = resolveModel(
        provider,
        modelId,
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -496,6 +496,21 @@ export function subscribeEmbeddedPiSession(params: {
          (evt as AgentEvent & { toolCallId: string }).toolCallId,
        );
        const args = (evt as AgentEvent & { args: unknown }).args;
+        if (toolName === "read") {
+          const record =
+            args && typeof args === "object"
+              ? (args as Record<string, unknown>)
+              : {};
+          const filePath =
+            typeof record.path === "string" ? record.path.trim() : "";
+          if (!filePath) {
+            const argsPreview =
+              typeof args === "string" ? args.slice(0, 200) : undefined;
+            log.warn(
+              `read tool called without path: toolCallId=${toolCallId} argsType=${typeof args}${argsPreview ? ` argsPreview=${argsPreview}` : ""}`,
+            );
+          }
+        }
        const meta = inferToolMetaFromArgs(toolName, args);
        toolMetaById.set(toolCallId, meta);
        log.debug(
--- a/src/config/io.ts
+++ b/src/config/io.ts
@@ -22,11 +22,7 @@ import {
  applyTalkApiKey,
 } from "./defaults.js";
 import { findLegacyConfigIssues } from "./legacy.js";
-import {
-  CONFIG_PATH_CLAWDBOT,
-  resolveConfigPath,
-  resolveStateDir,
-} from "./paths.js";
+import { resolveConfigPath, resolveStateDir } from "./paths.js";
 import { applyConfigOverrides } from "./runtime-overrides.js";
 import type {
  ClawdbotConfig,
@@ -348,8 +344,21 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
  };
 }

-const defaultIO = createConfigIO({ configPath: CONFIG_PATH_CLAWDBOT });
+// NOTE: These wrappers intentionally do *not* cache the resolved config path at
+// module scope. `CLAWDBOT_CONFIG_PATH` (and friends) are expected to work even
+// when set after the module has been imported (tests, one-off scripts, etc.).
+export function loadConfig(): ClawdbotConfig {
+  return createConfigIO({ configPath: resolveConfigPath() }).loadConfig();
+}

-export const loadConfig = defaultIO.loadConfig;
-export const readConfigFileSnapshot = defaultIO.readConfigFileSnapshot;
-export const writeConfigFile = defaultIO.writeConfigFile;
+export async function readConfigFileSnapshot(): Promise<ConfigFileSnapshot> {
+  return await createConfigIO({
+    configPath: resolveConfigPath(),
+  }).readConfigFileSnapshot();
+}
+
+export async function writeConfigFile(cfg: ClawdbotConfig): Promise<void> {
+  await createConfigIO({ configPath: resolveConfigPath() }).writeConfigFile(
+    cfg,
+  );
+}
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -206,17 +206,41 @@ describeLive("gateway live (dev agent, profile keys)", () => {
      expect(candidates.length).toBeGreaterThan(0);

      // Build a temp config that allows all selected models, so session overrides stick.
+      const lmstudioProvider = cfg.models?.providers?.lmstudio;
      const nextCfg = {
        ...cfg,
        agents: {
          ...cfg.agents,
+          list: (cfg.agents?.list ?? []).map((entry) => ({
+            ...entry,
+            sandbox: { mode: "off" },
+          })),
          defaults: {
            ...cfg.agents?.defaults,
+            // Live tests should avoid Docker sandboxing so tool probes can
+            // operate on the temporary probe files we create in the host workspace.
+            sandbox: { mode: "off" },
            models: Object.fromEntries(
              candidates.map((m) => [`${m.provider}/${m.id}`, {}]),
            ),
          },
        },
+        models: {
+          ...cfg.models,
+          providers: {
+            ...cfg.models?.providers,
+            // LM Studio is most reliable via Chat Completions; its Responses API
+            // tool-calling behavior is inconsistent across releases.
+            ...(lmstudioProvider
+              ? {
+                  lmstudio: {
+                    ...lmstudioProvider,
+                    api: "openai-completions",
+                  },
+                }
+              : {}),
+          },
+        },
      };
      const tempDir = await fs.mkdtemp(
        path.join(os.tmpdir(), "clawdbot-live-"),
@@ -275,8 +299,8 @@ describeLive("gateway live (dev agent, profile keys)", () => {
            const text = extractPayloadText(payload?.result);
            if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`);
            if (
-              !/\\bmicrotask\\b/i.test(text) ||
-              !/\\bmacrotask\\b/i.test(text)
+              !/\bmicro\s*-?\s*tasks?\b/i.test(text) ||
+              !/\bmacro\s*-?\s*tasks?\b/i.test(text)
            ) {
              throw new Error(`missing required keywords: ${text}`);
            }
@@ -289,7 +313,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
                sessionKey,
                idempotencyKey: `idem-${runIdTool}-tool`,
                message:
-                  `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) on "${toolProbePath}". ` +
+                  `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` +
                  `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`,
                deliver: false,
              },
@@ -403,7 +427,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
                );
              }
              const version = extractPayloadText(second?.result);
-              if (!/^\\d{4}\\.\\d+\\.\\d+/.test(version.trim())) {
+              if (!/^\d{4}\.\d+\.\d+/.test(version.trim())) {
                throw new Error(`unexpected version: ${version}`);
              }
            }
--- a/test/test-env.ts
+++ b/test/test-env.ts
@@ -12,6 +12,17 @@ function restoreEnv(entries: RestoreEntry[]): void {
 }

 export function installTestEnv(): { cleanup: () => void; tempHome: string } {
+  const live =
+    process.env.LIVE === "1" ||
+    process.env.CLAWDBOT_LIVE_TEST === "1" ||
+    process.env.CLAWDBOT_LIVE_GATEWAY === "1";
+
+  // Live tests must use the real user environment (keys, profiles, config).
+  // The default test env isolates HOME to avoid touching real state.
+  if (live) {
+    return { cleanup: () => {}, tempHome: process.env.HOME ?? "" };
+  }
+
  const restore: RestoreEntry[] = [
    { key: "HOME", value: process.env.HOME },
    { key: "USERPROFILE", value: process.env.USERPROFILE },