diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8de6b8fbc..07a5b175f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@
 - Gateway/Control UI: make `chat.send` non-blocking, wire Stop to `chat.abort`, and treat `/stop` as an out-of-band abort. (#653)
 - Gateway/Control UI: allow `chat.abort` without `runId` (abort active runs), suppress post-abort chat streaming, and prune stuck chat runs. (#653)
 - Gateway/Control UI: sniff image attachments for chat.send, drop non-images, and log mismatches. (#670) — thanks @cristip73.
+- Gateway/Agent: accept image attachments on `agent` (multimodal message) and add live gateway image probe (`CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE=1`).
 - CLI: `clawdbot sessions` now includes `elev:*` + `usage:*` flags in the table output.
 - CLI/Pairing: accept positional provider for `pairing list|approve` (npm-run compatible); update docs/bot hints.
 - Branding: normalize user-facing “ClawdBot”/“CLAWDBOT” → “Clawdbot” (CLI, status, docs).
diff --git a/docs/testing.md b/docs/testing.md
index 6a2c7f528..51df16c54 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -129,6 +129,8 @@ Live tests are split into two layers so we can isolate failures:
 - Optional tool-calling stress:
   - `CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE=1` enables an extra “bash writes file → read reads it back → echo nonce” check.
   - This is specifically meant to catch tool-calling compatibility issues across providers (formatting, history replay, tool_result pairing, etc.).
+- Optional image send smoke:
+  - `CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE=1` sends a real image attachment through the gateway agent pipeline (multimodal message) and asserts the model can read back a per-run code from the image.
 
 ### Recommended live recipes
 
@@ -143,6 +145,37 @@ Narrow, explicit allowlists are fastest and least flaky:
 - Tool calling across several providers (bash + read probe):
   - `LIVE=1 CLAWDBOT_LIVE_GATEWAY=1 CLAWDBOT_LIVE_GATEWAY_ALL_MODELS=1 CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE=1 CLAWDBOT_LIVE_GATEWAY_MODELS="openai/gpt-5.2,anthropic/claude-opus-4-5,google/gemini-flash-latest,zai/glm-4.7,minimax/minimax-m2.1" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts`
 
+## Live: model matrix (what we cover)
+
+There is no fixed “CI model list” (live is opt-in), but these are the **recommended** models to cover regularly on a dev machine with keys.
+
+### Baseline: tool calling (Read + optional Bash)
+
+Pick at least one per provider family:
+- OpenAI: `openai/gpt-5.2` (or `openai/gpt-5-mini`)
+- Anthropic: `anthropic/claude-opus-4-5` (or `anthropic/claude-sonnet-4-5`)
+- Google: `google/gemini-flash-latest` (or `google/gemini-2.5-pro`)
+- Z.AI (GLM): `zai/glm-4.7`
+- MiniMax: `minimax/minimax-m2.1`
+
+Optional additional coverage (nice to have):
+- xAI: `xai/grok-4` (or latest available)
+- Mistral: `mistral/`… (pick one “tools” capable model you have enabled)
+- Cerebras: `cerebras/`… (if you have access)
+- LM Studio: `lmstudio/`… (local; tool calling depends on API mode)
+
+### Vision: image send (attachment → multimodal message)
+
+Run with `CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE=1` and include at least one image-capable model in `CLAWDBOT_LIVE_GATEWAY_MODELS` (Claude/Gemini/OpenAI vision-capable variants, etc.).
+
+### Aggregators / alternate gateways
+
+If you have keys enabled, we also support testing via:
+- OpenRouter: `openrouter/...` (hundreds of models; use `clawdbot models scan` to find tool+image capable candidates)
+- OpenCode Zen: `opencode-zen/...` (requires `OPENCODE_ZEN_API_KEY`)
+
+Tip: don’t try to hardcode “all models” in docs. The authoritative list is whatever `discoverModels(...)` returns on your machine + whatever keys are available.
+
 ## Credentials (never commit)
 
 Live tests discover credentials the same way the CLI does. Practical implications:
diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts
index 1e7229b5e..4c5fb6960 100644
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -1,4 +1,4 @@
-import { randomUUID } from "node:crypto";
+import { randomBytes, randomUUID } from "node:crypto";
 import fs from "node:fs/promises";
 import { createServer } from "node:net";
 import os from "node:os";
@@ -16,6 +16,7 @@ import { ensureClawdbotModelsJson } from "../agents/models-config.js";
 import { loadConfig } from "../config/config.js";
 import { resolveUserPath } from "../utils.js";
 import { GatewayClient } from "./client.js";
+import { renderCatNoncePngBase64 } from "./live-image-probe.js";
 import { startGatewayServer } from "./server.js";
 
 const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1";
@@ -24,6 +25,8 @@ const ALL_MODELS =
   process.env.CLAWDBOT_LIVE_GATEWAY_ALL_MODELS === "1" ||
   process.env.CLAWDBOT_LIVE_GATEWAY_MODELS === "all";
 const EXTRA_TOOL_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE === "1";
+const EXTRA_IMAGE_PROBES =
+  process.env.CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE === "1";
 
 const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip;
 
@@ -60,6 +63,43 @@ function isMeaningful(text: string): boolean {
   return true;
 }
 
+function randomImageProbeCode(len = 10): string {
+  const alphabet = "2345689ABCEF";
+  const bytes = randomBytes(len);
+  let out = "";
+  for (let i = 0; i < len; i += 1) {
+    out += alphabet[bytes[i] % alphabet.length];
+  }
+  return out;
+}
+
+function editDistance(a: string, b: string): number {
+  if (a === b) return 0;
+  const aLen = a.length;
+  const bLen = b.length;
+  if (aLen === 0) return bLen;
+  if (bLen === 0) return aLen;
+
+  let prev = Array.from({ length: bLen + 1 }, (_v, idx) => idx);
+  let curr = Array.from({ length: bLen + 1 }, () => 0);
+
+  for (let i = 1; i <= aLen; i += 1) {
+    curr[0] = i;
+    const aCh = a.charCodeAt(i - 1);
+    for (let j = 1; j <= bLen; j += 1) {
+      const cost = aCh === b.charCodeAt(j - 1) ? 0 : 1;
+      curr[j] = Math.min(
+        prev[j] + 1, // delete
+        curr[j - 1] + 1, // insert
+        prev[j - 1] + cost, // substitute
+      );
+    }
+    [prev, curr] = [curr, prev];
+  }
+
+  return prev[bLen] ?? Number.POSITIVE_INFINITY;
+}
+
 async function getFreePort(): Promise<number> {
   return await new Promise((resolve, reject) => {
     const srv = createServer();
@@ -204,6 +244,14 @@ describeLive("gateway live (dev agent, profile keys)", () => {
       }
 
       expect(candidates.length).toBeGreaterThan(0);
+      const imageCandidates = EXTRA_IMAGE_PROBES
+        ? candidates.filter((m) => m.input?.includes("image"))
+        : [];
+      if (EXTRA_IMAGE_PROBES && imageCandidates.length === 0) {
+        throw new Error(
+          "image probe enabled but no selected models advertise image support; set CLAWDBOT_LIVE_GATEWAY_MODELS to include an image-capable model",
+        );
+      }
 
       // Build a temp config that allows all selected models, so session overrides stick.
       const lmstudioProvider = cfg.models?.providers?.lmstudio;
@@ -365,6 +413,53 @@ describeLive("gateway live (dev agent, profile keys)", () => {
               await fs.rm(toolWritePath, { force: true });
             }
 
+            if (EXTRA_IMAGE_PROBES && model.input?.includes("image")) {
+              const imageCode = randomImageProbeCode(10);
+              const imageBase64 = renderCatNoncePngBase64(imageCode);
+              const runIdImage = randomUUID();
+
+              const imageProbe = await client.request<AgentFinalPayload>(
+                "agent",
+                {
+                  sessionKey,
+                  idempotencyKey: `idem-${runIdImage}-image`,
+                  message:
+                    "Look at the attached image. Reply with exactly two tokens separated by a single space: " +
+                    "(1) the animal shown or written in the image, lowercase; " +
+                    "(2) the code printed in the image, uppercase. No extra text.",
+                  attachments: [
+                    {
+                      mimeType: "image/png",
+                      fileName: `probe-${runIdImage}.png`,
+                      content: imageBase64,
+                    },
+                  ],
+                  deliver: false,
+                },
+                { expectFinal: true },
+              );
+              if (imageProbe?.status !== "ok") {
+                throw new Error(
+                  `image probe failed: status=${String(imageProbe?.status)}`,
+                );
+              }
+              const imageText = extractPayloadText(imageProbe?.result);
+              if (!/\bcat\b/i.test(imageText)) {
+                throw new Error(`image probe missing 'cat': ${imageText}`);
+              }
+              const candidates =
+                imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
+              const bestDistance = candidates.reduce((best, cand) => {
+                if (Math.abs(cand.length - imageCode.length) > 2) return best;
+                return Math.min(best, editDistance(cand, imageCode));
+              }, Number.POSITIVE_INFINITY);
+              if (!(bestDistance <= 1)) {
+                throw new Error(
+                  `image probe missing code (${imageCode}): ${imageText}`,
+                );
+              }
+            }
+
             // Regression: tool-call-only turn followed by a user message (OpenAI responses bug class).
             if (
               (model.provider === "openai" &&
diff --git a/src/gateway/live-image-probe.ts b/src/gateway/live-image-probe.ts
new file mode 100644
index 000000000..490bd4daf
--- /dev/null
+++ b/src/gateway/live-image-probe.ts
@@ -0,0 +1,206 @@
+import { deflateSync } from "node:zlib";
+
+const CRC_TABLE = (() => {
+  const table = new Uint32Array(256);
+  for (let i = 0; i < 256; i += 1) {
+    let c = i;
+    for (let k = 0; k < 8; k += 1) {
+      c = c & 1 ? 0xedb88320 ^ (c >>> 1) : c >>> 1;
+    }
+    table[i] = c >>> 0;
+  }
+  return table;
+})();
+
+function crc32(buf: Buffer) {
+  let crc = 0xffffffff;
+  for (let i = 0; i < buf.length; i += 1) {
+    crc = CRC_TABLE[(crc ^ buf[i]) & 0xff] ^ (crc >>> 8);
+  }
+  return (crc ^ 0xffffffff) >>> 0;
+}
+
+function pngChunk(type: string, data: Buffer) {
+  const typeBuf = Buffer.from(type, "ascii");
+  const len = Buffer.alloc(4);
+  len.writeUInt32BE(data.length, 0);
+  const crc = crc32(Buffer.concat([typeBuf, data]));
+  const crcBuf = Buffer.alloc(4);
+  crcBuf.writeUInt32BE(crc, 0);
+  return Buffer.concat([len, typeBuf, data, crcBuf]);
+}
+
+function encodePngRgba(buffer: Buffer, width: number, height: number) {
+  const stride = width * 4;
+  const raw = Buffer.alloc((stride + 1) * height);
+  for (let row = 0; row < height; row += 1) {
+    const rawOffset = row * (stride + 1);
+    raw[rawOffset] = 0; // filter: none
+    buffer.copy(raw, rawOffset + 1, row * stride, row * stride + stride);
+  }
+  const compressed = deflateSync(raw);
+
+  const signature = Buffer.from([
+    0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
+  ]);
+  const ihdr = Buffer.alloc(13);
+  ihdr.writeUInt32BE(width, 0);
+  ihdr.writeUInt32BE(height, 4);
+  ihdr[8] = 8; // bit depth
+  ihdr[9] = 6; // color type RGBA
+  ihdr[10] = 0; // compression
+  ihdr[11] = 0; // filter
+  ihdr[12] = 0; // interlace
+
+  return Buffer.concat([
+    signature,
+    pngChunk("IHDR", ihdr),
+    pngChunk("IDAT", compressed),
+    pngChunk("IEND", Buffer.alloc(0)),
+  ]);
+}
+
+function fillPixel(
+  buf: Buffer,
+  x: number,
+  y: number,
+  width: number,
+  r: number,
+  g: number,
+  b: number,
+  a = 255,
+) {
+  if (x < 0 || y < 0) return;
+  if (x >= width) return;
+  const idx = (y * width + x) * 4;
+  if (idx < 0 || idx + 3 >= buf.length) return;
+  buf[idx] = r;
+  buf[idx + 1] = g;
+  buf[idx + 2] = b;
+  buf[idx + 3] = a;
+}
+
+const GLYPH_ROWS_5X7: Record<string, number[]> = {
+  "0": [0b01110, 0b10001, 0b10011, 0b10101, 0b11001, 0b10001, 0b01110],
+  "1": [0b00100, 0b01100, 0b00100, 0b00100, 0b00100, 0b00100, 0b01110],
+  "2": [0b01110, 0b10001, 0b00001, 0b00010, 0b00100, 0b01000, 0b11111],
+  "3": [0b11110, 0b00001, 0b00001, 0b01110, 0b00001, 0b00001, 0b11110],
+  "4": [0b00010, 0b00110, 0b01010, 0b10010, 0b11111, 0b00010, 0b00010],
+  "5": [0b11111, 0b10000, 0b11110, 0b00001, 0b00001, 0b10001, 0b01110],
+  "6": [0b00110, 0b01000, 0b10000, 0b11110, 0b10001, 0b10001, 0b01110],
+  "7": [0b11111, 0b00001, 0b00010, 0b00100, 0b01000, 0b01000, 0b01000],
+  "8": [0b01110, 0b10001, 0b10001, 0b01110, 0b10001, 0b10001, 0b01110],
+  "9": [0b01110, 0b10001, 0b10001, 0b01111, 0b00001, 0b00010, 0b01100],
+
+  A: [0b01110, 0b10001, 0b10001, 0b11111, 0b10001, 0b10001, 0b10001],
+  B: [0b11110, 0b10001, 0b10001, 0b11110, 0b10001, 0b10001, 0b11110],
+  C: [0b01110, 0b10001, 0b10000, 0b10000, 0b10000, 0b10001, 0b01110],
+  D: [0b11110, 0b10001, 0b10001, 0b10001, 0b10001, 0b10001, 0b11110],
+  E: [0b11111, 0b10000, 0b10000, 0b11110, 0b10000, 0b10000, 0b11111],
+  F: [0b11111, 0b10000, 0b10000, 0b11110, 0b10000, 0b10000, 0b10000],
+  T: [0b11111, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100, 0b00100],
+};
+
+function drawGlyph5x7(params: {
+  buf: Buffer;
+  width: number;
+  x: number;
+  y: number;
+  char: string;
+  scale: number;
+  color: { r: number; g: number; b: number; a?: number };
+}) {
+  const rows = GLYPH_ROWS_5X7[params.char];
+  if (!rows) return;
+  for (let row = 0; row < 7; row += 1) {
+    const bits = rows[row] ?? 0;
+    for (let col = 0; col < 5; col += 1) {
+      const on = (bits & (1 << (4 - col))) !== 0;
+      if (!on) continue;
+      for (let dy = 0; dy < params.scale; dy += 1) {
+        for (let dx = 0; dx < params.scale; dx += 1) {
+          fillPixel(
+            params.buf,
+            params.x + col * params.scale + dx,
+            params.y + row * params.scale + dy,
+            params.width,
+            params.color.r,
+            params.color.g,
+            params.color.b,
+            params.color.a ?? 255,
+          );
+        }
+      }
+    }
+  }
+}
+
+function drawText(params: {
+  buf: Buffer;
+  width: number;
+  x: number;
+  y: number;
+  text: string;
+  scale: number;
+  color: { r: number; g: number; b: number; a?: number };
+}) {
+  const text = params.text.toUpperCase();
+  let cursorX = params.x;
+  for (const raw of text) {
+    const ch = raw in GLYPH_ROWS_5X7 ? raw : raw.toUpperCase();
+    drawGlyph5x7({
+      buf: params.buf,
+      width: params.width,
+      x: cursorX,
+      y: params.y,
+      char: ch,
+      scale: params.scale,
+      color: params.color,
+    });
+    cursorX += 6 * params.scale;
+  }
+}
+
+function measureTextWidthPx(text: string, scale: number) {
+  return text.length * 6 * scale - scale; // 5px glyph + 1px space
+}
+
+export function renderCatNoncePngBase64(nonce: string): string {
+  const top = "CAT";
+  const bottom = nonce.toUpperCase();
+
+  const scale = 12;
+  const pad = 18;
+  const gap = 18;
+
+  const topWidth = measureTextWidthPx(top, scale);
+  const bottomWidth = measureTextWidthPx(bottom, scale);
+  const width = Math.max(topWidth, bottomWidth) + pad * 2;
+  const height = pad * 2 + 7 * scale + gap + 7 * scale;
+
+  const buf = Buffer.alloc(width * height * 4, 255);
+  const black = { r: 0, g: 0, b: 0 };
+
+  drawText({
+    buf,
+    width,
+    x: Math.floor((width - topWidth) / 2),
+    y: pad,
+    text: top,
+    scale,
+    color: black,
+  });
+
+  drawText({
+    buf,
+    width,
+    x: Math.floor((width - bottomWidth) / 2),
+    y: pad + 7 * scale + gap,
+    text: bottom,
+    scale,
+    color: black,
+  });
+
+  const png = encodePngRgba(buf, width, height);
+  return png.toString("base64");
+}
diff --git a/src/gateway/protocol/schema.ts b/src/gateway/protocol/schema.ts
index acceefe46..784370791 100644
--- a/src/gateway/protocol/schema.ts
+++ b/src/gateway/protocol/schema.ts
@@ -225,6 +225,7 @@ export const AgentParamsSchema = Type.Object(
     sessionKey: Type.Optional(Type.String()),
     thinking: Type.Optional(Type.String()),
     deliver: Type.Optional(Type.Boolean()),
+    attachments: Type.Optional(Type.Array(Type.Unknown())),
     provider: Type.Optional(Type.String()),
     timeout: Type.Optional(Type.Integer({ minimum: 0 })),
     lane: Type.Optional(Type.String()),
diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts
index 9ee67c9fe..3184539be 100644
--- a/src/gateway/server-methods/agent.ts
+++ b/src/gateway/server-methods/agent.ts
@@ -23,6 +23,7 @@ import {
   isWhatsAppGroupJid,
   normalizeWhatsAppTarget,
 } from "../../whatsapp/normalize.js";
+import { parseMessageWithAttachments } from "../chat-attachments.js";
 import {
   type AgentWaitParams,
   ErrorCodes,
@@ -57,6 +58,12 @@ export const agentHandlers: GatewayRequestHandlers = {
       sessionKey?: string;
       thinking?: string;
       deliver?: boolean;
+      attachments?: Array<{
+        type?: string;
+        mimeType?: string;
+        fileName?: string;
+        content?: unknown;
+      }>;
       provider?: string;
       lane?: string;
       extraSystemPrompt?: string;
@@ -73,7 +80,45 @@ export const agentHandlers: GatewayRequestHandlers = {
       });
       return;
     }
-    const message = request.message.trim();
+    const normalizedAttachments =
+      request.attachments
+        ?.map((a) => ({
+          type: typeof a?.type === "string" ? a.type : undefined,
+          mimeType: typeof a?.mimeType === "string" ? a.mimeType : undefined,
+          fileName: typeof a?.fileName === "string" ? a.fileName : undefined,
+          content:
+            typeof a?.content === "string"
+              ? a.content
+              : ArrayBuffer.isView(a?.content)
+                ? Buffer.from(
+                    a.content.buffer,
+                    a.content.byteOffset,
+                    a.content.byteLength,
+                  ).toString("base64")
+                : undefined,
+        }))
+        .filter((a) => a.content) ?? [];
+
+    let message = request.message.trim();
+    let images: Array<{ type: "image"; data: string; mimeType: string }> = [];
+    if (normalizedAttachments.length > 0) {
+      try {
+        const parsed = await parseMessageWithAttachments(
+          message,
+          normalizedAttachments,
+          { maxBytes: 5_000_000, log: context.logGateway },
+        );
+        message = parsed.message.trim();
+        images = parsed.images;
+      } catch (err) {
+        respond(
+          false,
+          undefined,
+          errorShape(ErrorCodes.INVALID_REQUEST, String(err)),
+        );
+        return;
+      }
+    }
     const rawProvider =
       typeof request.provider === "string" ? request.provider.trim() : "";
     if (rawProvider) {
@@ -275,6 +320,7 @@ export const agentHandlers: GatewayRequestHandlers = {
     void agentCommand(
       {
         message,
+        images,
         to: sanitizedTo,
         sessionId: resolvedSessionId,
         sessionKey: requestedSessionKey,
diff --git a/src/gateway/server.agent.test.ts b/src/gateway/server.agent.test.ts
index b1807cc50..682f76969 100644
--- a/src/gateway/server.agent.test.ts
+++ b/src/gateway/server.agent.test.ts
@@ -21,6 +21,9 @@ import {
 
 installGatewayTestHooks();
 
+const BASE_IMAGE_PNG =
+  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+X3mIAAAAASUVORK5CYII=";
+
 function expectProviders(call: Record<string, unknown>, provider: string) {
   expect(call.provider).toBe(provider);
   expect(call.messageProvider).toBe(provider);
@@ -111,6 +114,58 @@ describe("gateway server agent", () => {
     await server.close();
   });
 
+  test("agent forwards image attachments as images[]", async () => {
+    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-gw-"));
+    testState.sessionStorePath = path.join(dir, "sessions.json");
+    await fs.writeFile(
+      testState.sessionStorePath,
+      JSON.stringify(
+        {
+          main: {
+            sessionId: "sess-main-images",
+            updatedAt: Date.now(),
+          },
+        },
+        null,
+        2,
+      ),
+      "utf-8",
+    );
+
+    const { server, ws } = await startServerWithClient();
+    await connectOk(ws);
+
+    const res = await rpcReq(ws, "agent", {
+      message: "what is in the image?",
+      sessionKey: "main",
+      attachments: [
+        {
+          mimeType: "image/png",
+          fileName: "tiny.png",
+          content: BASE_IMAGE_PNG,
+        },
+      ],
+      idempotencyKey: "idem-agent-attachments",
+    });
+    expect(res.ok).toBe(true);
+
+    const spy = vi.mocked(agentCommand);
+    const call = spy.mock.calls.at(-1)?.[0] as Record<string, unknown>;
+    expect(call.sessionKey).toBe("main");
+    expectProviders(call, "webchat");
+    expect(call.message).toBe("what is in the image?");
+
+    const images = call.images as Array<Record<string, unknown>>;
+    expect(Array.isArray(images)).toBe(true);
+    expect(images.length).toBe(1);
+    expect(images[0]?.type).toBe("image");
+    expect(images[0]?.mimeType).toBe("image/png");
+    expect(images[0]?.data).toBe(BASE_IMAGE_PNG);
+
+    ws.close();
+    await server.close();
+  });
+
   test("agent falls back to whatsapp when delivery requested and no last provider exists", async () => {
     testState.allowFrom = ["+1555"];
     const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-gw-"));