From 2045395ccb67c7bb1ef35215304e11d570cddb92 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 10 Jan 2026 04:14:22 +0000 Subject: [PATCH] test(live): add optional write/bash probes --- .../gateway-models.profiles.live.test.ts | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index b0736be00..e2e1fdd6a 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -23,6 +23,7 @@ const GATEWAY_LIVE = process.env.CLAWDBOT_LIVE_GATEWAY === "1"; const ALL_MODELS = process.env.CLAWDBOT_LIVE_GATEWAY_ALL_MODELS === "1" || process.env.CLAWDBOT_LIVE_GATEWAY_MODELS === "all"; +const EXTRA_TOOL_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE === "1"; const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip; @@ -304,6 +305,62 @@ describeLive("gateway live (dev agent, profile keys)", () => { throw new Error(`tool probe missing nonce: ${toolText}`); } + if (EXTRA_TOOL_PROBES) { + const nonceC = `nonceC=${randomUUID()}`; + const nonceD = `nonceD=${randomUUID()}`; + const toolWritePath = path.join( + tempDir, + `write-${runIdTool}.txt`, + ); + + const writeProbe = await client.request( + "agent", + { + sessionKey, + idempotencyKey: `idem-${runIdTool}-write`, + message: + `Call the tool named \`write\` (or \`Write\` if \`write\` is unavailable) to write exactly "${nonceC}" to "${toolWritePath}". ` + + `Then call the tool named \`read\` (or \`Read\`) on "${toolWritePath}". ` + + `Finally reply with exactly: ${nonceC}.`, + deliver: false, + }, + { expectFinal: true }, + ); + if (writeProbe?.status !== "ok") { + throw new Error( + `write probe failed: status=${String(writeProbe?.status)}`, + ); + } + const writeText = extractPayloadText(writeProbe?.result); + if (!writeText.includes(nonceC)) { + throw new Error(`write probe missing nonce: ${writeText}`); + } + + const bashProbe = await client.request( + "agent", + { + sessionKey, + idempotencyKey: `idem-${runIdTool}-bash`, + message: + `Call the tool named \`bash\` (or \`Bash\` if \`bash\` is unavailable) and run: echo ${nonceD}. ` + + `Then reply with exactly: ${nonceD}.`, + deliver: false, + }, + { expectFinal: true }, + ); + if (bashProbe?.status !== "ok") { + throw new Error( + `bash probe failed: status=${String(bashProbe?.status)}`, + ); + } + const bashText = extractPayloadText(bashProbe?.result); + if (!bashText.includes(nonceD)) { + throw new Error(`bash probe missing nonce: ${bashText}`); + } + + await fs.rm(toolWritePath, { force: true }); + } + // Regression: tool-call-only turn followed by a user message (OpenAI responses bug class). if ( (model.provider === "openai" &&