diff --git a/AGENTS.md b/AGENTS.md
index 84ad34145..4fb2f17b1 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -34,7 +34,7 @@
 - Framework: Vitest with V8 coverage thresholds (70% lines/branches/functions/statements).
 - Naming: match source names with `*.test.ts`; e2e in `*.e2e.test.ts`.
 - Run `pnpm test` (or `pnpm test:coverage`) before pushing when you touch logic.
-- Live tests: `LIVE=1 pnpm test:live` (real keys; skipped by default). Docker runners: `scripts/test-live-models-docker.sh`, `scripts/test-live-gateway-models-docker.sh`.
+- Live tests (real keys): `CLAWDBOT_LIVE_TEST=1 pnpm test:live` (Clawdbot-only) or `LIVE=1 pnpm test:live` (includes provider live tests). Docker: `pnpm test:docker:live-models`, `pnpm test:docker:live-gateway`. Onboarding Docker E2E: `pnpm test:docker:onboard`.
 - Full kit + what’s covered: `docs/testing.md`.
 - Pure test additions/fixes generally do **not** need a changelog entry unless they alter user-facing behavior or the user asks for one.
 - Mobile: before using a simulator, check for connected real devices (iOS + Android) and prefer them when available.
diff --git a/docs/testing.md b/docs/testing.md
index d9c9b258d..c72707167 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -15,7 +15,8 @@ Clawdbot has three Vitest suites (unit, e2e, live) plus a couple Docker helpers
 - Full gate (what we expect before push): `pnpm lint && pnpm build && pnpm test`
 - Coverage gate: `pnpm test:coverage`
 - E2E suite: `pnpm test:e2e`
-- Live suite (opt-in): `LIVE=1 pnpm test:live`
+- Live suite (opt-in, Clawdbot only): `CLAWDBOT_LIVE_TEST=1 pnpm test:live`
+- Live suite (opt-in, includes provider live tests too): `LIVE=1 pnpm test:live`
 
 ## Test suites (what runs where)
 
@@ -38,7 +39,7 @@ Clawdbot has three Vitest suites (unit, e2e, live) plus a couple Docker helpers
 - Command: `pnpm test:live`
 - Config: `vitest.live.config.ts`
 - Files: `src/**/*.live.test.ts`
-- Default: **skipped** unless `LIVE=1` (or `CLAWDBOT_LIVE_TEST=1`)
+- Default: **skipped** unless `CLAWDBOT_LIVE_TEST=1` or `LIVE=1`
 - Scope: “does this provider/model actually work today with real creds”.
 
 ## Live: model smoke (profile keys)
@@ -57,6 +58,7 @@ Two layers:
 2. Gateway + dev agent smoke (what “@clawdbot” actually does):
    - Test: `src/gateway/gateway-models.profiles.live.test.ts`
    - Goal: spin up an in-process gateway, create/patch a `agent:dev:*` session, iterate models-with-keys, and assert “meaningful” responses.
+   - Covers providers present in your `models.json`/config (e.g. OpenAI, Anthropic, Google Gemini, `google-antigravity`, etc.) as long as a key/profile is available.
    - Selection:
      - `CLAWDBOT_LIVE_GATEWAY=1`
      - `CLAWDBOT_LIVE_GATEWAY_ALL_MODELS=1` (scan all discovered models with available keys)
@@ -74,18 +76,23 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local
 
 ## Docker runners (optional “works in Linux” checks)
 
-These run `pnpm test:live` inside the repo Docker image, mounting your local config dir and workspace:
+These run `pnpm test:live` inside the repo Docker image, mounting your local config dir and workspace (and sourcing `~/.profile` if mounted):
 
-- Direct models: `scripts/test-live-models-docker.sh`
-- Gateway + dev agent: `scripts/test-live-gateway-models-docker.sh`
+- Direct models: `pnpm test:docker:live-models` (script: `scripts/test-live-models-docker.sh`)
+- Gateway + dev agent: `pnpm test:docker:live-gateway` (script: `scripts/test-live-gateway-models-docker.sh`)
 
 Useful env vars:
 
 - `CLAWDBOT_CONFIG_DIR=...` (default: `~/.clawdbot`) mounted to `/home/node/.clawdbot`
 - `CLAWDBOT_WORKSPACE_DIR=...` (default: `~/clawd`) mounted to `/home/node/clawd`
+- `CLAWDBOT_PROFILE_FILE=...` (default: `~/.profile`) mounted to `/home/node/.profile` and sourced before running tests
 - `CLAWDBOT_LIVE_GATEWAY_MODELS=...` / `CLAWDBOT_LIVE_MODELS=...` to narrow the run
+- `CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS=1` to ensure creds come from the profile store (not env)
 
 ## Docs sanity
 
 Run docs checks after doc edits: `pnpm docs:list`.
 
+## Offline regression (CI-safe)
+
+- Gateway tool calling (mock OpenAI, real gateway + agent loop): `src/gateway/gateway.tool-calling.mock-openai.test.ts`
diff --git a/package.json b/package.json
index 483b4761d..e34a1c18b 100644
--- a/package.json
+++ b/package.json
@@ -97,6 +97,8 @@
     "test:e2e": "vitest run --config vitest.e2e.config.ts",
     "test:live": "vitest run --config vitest.live.config.ts",
     "test:docker:onboard": "bash scripts/e2e/onboard-docker.sh",
+    "test:docker:live-models": "bash scripts/test-live-models-docker.sh",
+    "test:docker:live-gateway": "bash scripts/test-live-gateway-models-docker.sh",
     "test:docker:qr": "bash scripts/e2e/qr-import-docker.sh",
     "test:docker:doctor-switch": "bash scripts/e2e/doctor-install-switch-docker.sh",
     "protocol:gen": "tsx scripts/protocol-gen.ts",
diff --git a/scripts/test-live-gateway-models-docker.sh b/scripts/test-live-gateway-models-docker.sh
index b30883eec..e82c35a90 100755
--- a/scripts/test-live-gateway-models-docker.sh
+++ b/scripts/test-live-gateway-models-docker.sh
@@ -19,7 +19,7 @@ echo "==> Run gateway live model tests (profile keys)"
 docker run --rm -t \
   --entrypoint bash \
   -e HOME=/home/node \
-  -e LIVE=1 \
+  -e CLAWDBOT_LIVE_TEST=1 \
   -e CLAWDBOT_LIVE_GATEWAY=1 \
   -e CLAWDBOT_LIVE_GATEWAY_ALL_MODELS=1 \
   -e CLAWDBOT_LIVE_GATEWAY_MODELS="${CLAWDBOT_LIVE_GATEWAY_MODELS:-all}" \
@@ -27,4 +27,4 @@ docker run --rm -t \
   -v "$WORKSPACE_DIR":/home/node/clawd \
   "${PROFILE_MOUNT[@]}" \
   "$IMAGE_NAME" \
-  -lc "cd /app && pnpm test:live"
+  -lc "set -euo pipefail; [ -f \"$HOME/.profile\" ] && source \"$HOME/.profile\" || true; cd /app && pnpm test:live"
diff --git a/scripts/test-live-models-docker.sh b/scripts/test-live-models-docker.sh
index 7fdd48372..c3cfd986e 100755
--- a/scripts/test-live-models-docker.sh
+++ b/scripts/test-live-models-docker.sh
@@ -19,11 +19,11 @@ echo "==> Run live model tests (profile keys)"
 docker run --rm -t \
   --entrypoint bash \
   -e HOME=/home/node \
-  -e LIVE=1 \
+  -e CLAWDBOT_LIVE_TEST=1 \
   -e CLAWDBOT_LIVE_ALL_MODELS=1 \
-  -e CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS=1 \
+  -e CLAWDBOT_LIVE_MODELS="${CLAWDBOT_LIVE_MODELS:-all}" \
   -v "$CONFIG_DIR":/home/node/.clawdbot \
   -v "$WORKSPACE_DIR":/home/node/clawd \
   "${PROFILE_MOUNT[@]}" \
   "$IMAGE_NAME" \
-  -lc "cd /app && pnpm test:live"
+  -lc "set -euo pipefail; [ -f \"$HOME/.profile\" ] && source \"$HOME/.profile\" || true; cd /app && pnpm test:live"