diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a6a80d80..e013f902e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.clawd.bot - **BREAKING:** Envelope and system event timestamps now default to host-local time (was UTC) so agents don’t have to constantly convert. ### Fixes +- Media: accept MEDIA paths with spaces/tilde and prefer the message tool hint for image replies. - Config: avoid stack traces for invalid configs and log the config path. - CLI: read Codex CLI account_id for workspace billing. (#1422) Thanks @aj47. - Doctor: avoid recreating WhatsApp config when only legacy routing keys remain. (#900) diff --git a/apps/macos/Sources/Clawdbot/ExecApprovalsSocket.swift b/apps/macos/Sources/Clawdbot/ExecApprovalsSocket.swift index dcbc0a9bb..b5b74bec8 100644 --- a/apps/macos/Sources/Clawdbot/ExecApprovalsSocket.swift +++ b/apps/macos/Sources/Clawdbot/ExecApprovalsSocket.swift @@ -215,36 +215,15 @@ enum ExecApprovalsPromptPresenter { let alert = NSAlert() alert.alertStyle = .warning alert.messageText = "Allow this command?" - - var details = "Clawdbot wants to run:\n\n\(request.command)" - let trimmedCwd = request.cwd?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - if !trimmedCwd.isEmpty { - details += "\n\nWorking directory:\n\(trimmedCwd)" - } - let trimmedAgent = request.agentId?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - if !trimmedAgent.isEmpty { - details += "\n\nAgent:\n\(trimmedAgent)" - } - let trimmedPath = request.resolvedPath?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - if !trimmedPath.isEmpty { - details += "\n\nExecutable:\n\(trimmedPath)" - } - let trimmedHost = request.host?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - if !trimmedHost.isEmpty { - details += "\n\nHost:\n\(trimmedHost)" - } - if let security = request.security?.trimmingCharacters(in: .whitespacesAndNewlines), !security.isEmpty { - details += "\n\nSecurity:\n\(security)" - } - if let ask = request.ask?.trimmingCharacters(in: .whitespacesAndNewlines), !ask.isEmpty { - details += "\nAsk mode:\n\(ask)" - } - details += "\n\nThis runs on this machine." - alert.informativeText = details + alert.informativeText = "Review the command details before allowing." + alert.accessoryView = self.buildAccessoryView(request) alert.addButton(withTitle: "Allow Once") alert.addButton(withTitle: "Always Allow") alert.addButton(withTitle: "Don't Allow") + if #available(macOS 11.0, *), alert.buttons.indices.contains(2) { + alert.buttons[2].hasDestructiveAction = true + } switch alert.runModal() { case .alertFirstButtonReturn: @@ -255,6 +234,110 @@ enum ExecApprovalsPromptPresenter { return .deny } } + + @MainActor + private static func buildAccessoryView(_ request: ExecApprovalPromptRequest) -> NSView { + let stack = NSStackView() + stack.orientation = .vertical + stack.spacing = 8 + stack.alignment = .leading + + let commandTitle = NSTextField(labelWithString: "Command") + commandTitle.font = NSFont.boldSystemFont(ofSize: NSFont.systemFontSize) + stack.addArrangedSubview(commandTitle) + + let commandText = NSTextView() + commandText.isEditable = false + commandText.isSelectable = true + commandText.drawsBackground = true + commandText.backgroundColor = NSColor.textBackgroundColor + commandText.font = NSFont.monospacedSystemFont(ofSize: NSFont.systemFontSize, weight: .regular) + commandText.string = request.command + commandText.textContainerInset = NSSize(width: 6, height: 6) + commandText.textContainer?.lineFragmentPadding = 0 + commandText.textContainer?.widthTracksTextView = true + commandText.isHorizontallyResizable = false + commandText.isVerticallyResizable = false + + let commandScroll = NSScrollView() + commandScroll.borderType = .lineBorder + commandScroll.hasVerticalScroller = false + commandScroll.hasHorizontalScroller = false + commandScroll.documentView = commandText + commandScroll.translatesAutoresizingMaskIntoConstraints = false + commandScroll.widthAnchor.constraint(lessThanOrEqualToConstant: 440).isActive = true + commandScroll.heightAnchor.constraint(greaterThanOrEqualToConstant: 56).isActive = true + stack.addArrangedSubview(commandScroll) + + let contextTitle = NSTextField(labelWithString: "Context") + contextTitle.font = NSFont.boldSystemFont(ofSize: NSFont.systemFontSize) + stack.addArrangedSubview(contextTitle) + + let contextStack = NSStackView() + contextStack.orientation = .vertical + contextStack.spacing = 4 + contextStack.alignment = .leading + + let trimmedCwd = request.cwd?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if !trimmedCwd.isEmpty { + self.addDetailRow(title: "Working directory", value: trimmedCwd, to: contextStack) + } + let trimmedAgent = request.agentId?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if !trimmedAgent.isEmpty { + self.addDetailRow(title: "Agent", value: trimmedAgent, to: contextStack) + } + let trimmedPath = request.resolvedPath?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if !trimmedPath.isEmpty { + self.addDetailRow(title: "Executable", value: trimmedPath, to: contextStack) + } + let trimmedHost = request.host?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if !trimmedHost.isEmpty { + self.addDetailRow(title: "Host", value: trimmedHost, to: contextStack) + } + if let security = request.security?.trimmingCharacters(in: .whitespacesAndNewlines), !security.isEmpty { + self.addDetailRow(title: "Security", value: security, to: contextStack) + } + if let ask = request.ask?.trimmingCharacters(in: .whitespacesAndNewlines), !ask.isEmpty { + self.addDetailRow(title: "Ask mode", value: ask, to: contextStack) + } + + if contextStack.arrangedSubviews.isEmpty { + let empty = NSTextField(labelWithString: "No additional context provided.") + empty.textColor = NSColor.secondaryLabelColor + empty.font = NSFont.systemFont(ofSize: NSFont.smallSystemFontSize) + contextStack.addArrangedSubview(empty) + } + + stack.addArrangedSubview(contextStack) + + let footer = NSTextField(labelWithString: "This runs on this machine.") + footer.textColor = NSColor.secondaryLabelColor + footer.font = NSFont.systemFont(ofSize: NSFont.smallSystemFontSize) + stack.addArrangedSubview(footer) + + return stack + } + + @MainActor + private static func addDetailRow(title: String, value: String, to stack: NSStackView) { + let row = NSStackView() + row.orientation = .horizontal + row.spacing = 6 + row.alignment = .firstBaseline + + let titleLabel = NSTextField(labelWithString: "\(title):") + titleLabel.font = NSFont.systemFont(ofSize: NSFont.smallSystemFontSize, weight: .semibold) + titleLabel.textColor = NSColor.secondaryLabelColor + + let valueLabel = NSTextField(labelWithString: value) + valueLabel.font = NSFont.systemFont(ofSize: NSFont.smallSystemFontSize) + valueLabel.lineBreakMode = .byTruncatingMiddle + valueLabel.setContentCompressionResistancePriority(.defaultLow, for: .horizontal) + + row.addArrangedSubview(titleLabel) + row.addArrangedSubview(valueLabel) + stack.addArrangedSubview(row) + } } @MainActor diff --git a/docs/concepts/agent-workspace.md b/docs/concepts/agent-workspace.md index 33b0e174a..4033fd267 100644 --- a/docs/concepts/agent-workspace.md +++ b/docs/concepts/agent-workspace.md @@ -140,6 +140,9 @@ workspace lives). ### 1) Initialize the repo +If git is installed, brand-new workspaces are initialized automatically. If this +workspace is not already a repo, run: + ```bash cd ~/clawd git init diff --git a/docs/start/clawd.md b/docs/start/clawd.md index b68bdee7c..dd1224a50 100644 --- a/docs/start/clawd.md +++ b/docs/start/clawd.md @@ -95,7 +95,7 @@ Clawd reads operating instructions and “memory” from its workspace directory By default, Clawdbot uses `~/clawd` as the agent workspace, and will create it (plus starter `AGENTS.md`, `SOUL.md`, `TOOLS.md`, `IDENTITY.md`, `USER.md`) automatically on setup/first agent run. `BOOTSTRAP.md` is only created when the workspace is brand new (it should not come back after you delete it). -Tip: treat this folder like Clawd’s “memory” and make it a git repo (ideally private) so your `AGENTS.md` + memory files are backed up. +Tip: treat this folder like Clawd’s “memory” and make it a git repo (ideally private) so your `AGENTS.md` + memory files are backed up. If git is installed, brand-new workspaces are auto-initialized. ```bash clawdbot setup diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index f16a71759..c770b241a 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -46,6 +46,7 @@ import { loadWorkspaceSkillEntries, resolveSkillsPromptForRun, } from "../../skills.js"; +import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveDefaultModelForAgent } from "../../model-selection.js"; @@ -184,6 +185,11 @@ export async function runEmbeddedAttempt( sessionId: params.sessionId, warn: makeBootstrapWarn({ sessionLabel, warn: (message) => log.warn(message) }), }); + const workspaceNotes = hookAdjustedBootstrapFiles.some( + (file) => file.name === DEFAULT_BOOTSTRAP_FILENAME && !file.missing, + ) + ? ["Reminder: commit your changes in this workspace after edits."] + : undefined; const agentDir = params.agentDir ?? resolveClawdbotAgentDir(); @@ -314,6 +320,7 @@ export async function runEmbeddedAttempt( : undefined, skillsPrompt, docsPath: docsPath ?? undefined, + workspaceNotes, reactionGuidance, promptMode, runtimeInfo, diff --git a/src/agents/pi-embedded-runner/system-prompt.ts b/src/agents/pi-embedded-runner/system-prompt.ts index 196458df9..cde0f0a15 100644 --- a/src/agents/pi-embedded-runner/system-prompt.ts +++ b/src/agents/pi-embedded-runner/system-prompt.ts @@ -20,6 +20,7 @@ export function buildEmbeddedSystemPrompt(params: { level: "minimal" | "extensive"; channel: string; }; + workspaceNotes?: string[]; /** Controls which hardcoded sections to include. Defaults to "full". */ promptMode?: PromptMode; runtimeInfo: { @@ -54,6 +55,7 @@ export function buildEmbeddedSystemPrompt(params: { heartbeatPrompt: params.heartbeatPrompt, skillsPrompt: params.skillsPrompt, docsPath: params.docsPath, + workspaceNotes: params.workspaceNotes, reactionGuidance: params.reactionGuidance, promptMode: params.promptMode, runtimeInfo: params.runtimeInfo, diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.ts b/src/agents/pi-embedded-subscribe.handlers.messages.ts index b2074d866..1f515e113 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts @@ -226,24 +226,27 @@ export function handleMessageEnd( ); } else { ctx.state.lastBlockReplyText = text; - const { - text: cleanedText, - mediaUrls, - audioAsVoice, - replyToId, - replyToTag, - replyToCurrent, - } = parseReplyDirectives(text); - // Emit if there's content OR audioAsVoice flag (to propagate the flag). - if (cleanedText || (mediaUrls && mediaUrls.length > 0) || audioAsVoice) { - void onBlockReply({ + const splitResult = ctx.consumeReplyDirectives(text, { final: true }); + if (splitResult) { + const { text: cleanedText, - mediaUrls: mediaUrls?.length ? mediaUrls : undefined, + mediaUrls, audioAsVoice, replyToId, replyToTag, replyToCurrent, - }); + } = splitResult; + // Emit if there's content OR audioAsVoice flag (to propagate the flag). + if (cleanedText || (mediaUrls && mediaUrls.length > 0) || audioAsVoice) { + void onBlockReply({ + text: cleanedText, + mediaUrls: mediaUrls?.length ? mediaUrls : undefined, + audioAsVoice, + replyToId, + replyToTag, + replyToCurrent, + }); + } } } } @@ -254,6 +257,30 @@ export function handleMessageEnd( ctx.emitReasoningStream(rawThinking); } + if (ctx.state.blockReplyBreak === "text_end" && onBlockReply) { + const tailResult = ctx.consumeReplyDirectives("", { final: true }); + if (tailResult) { + const { + text: cleanedText, + mediaUrls, + audioAsVoice, + replyToId, + replyToTag, + replyToCurrent, + } = tailResult; + if (cleanedText || (mediaUrls && mediaUrls.length > 0) || audioAsVoice) { + void onBlockReply({ + text: cleanedText, + mediaUrls: mediaUrls?.length ? mediaUrls : undefined, + audioAsVoice, + replyToId, + replyToTag, + replyToCurrent, + }); + } + } + } + ctx.state.deltaBuffer = ""; ctx.state.blockBuffer = ""; ctx.blockChunker?.reset(); diff --git a/src/agents/pi-embedded-subscribe.handlers.types.ts b/src/agents/pi-embedded-subscribe.handlers.types.ts index 94a107961..4a464c5e2 100644 --- a/src/agents/pi-embedded-subscribe.handlers.types.ts +++ b/src/agents/pi-embedded-subscribe.handlers.types.ts @@ -1,6 +1,7 @@ import type { AgentEvent, AgentMessage } from "@mariozechner/pi-agent-core"; import type { ReasoningLevel } from "../auto-reply/thinking.js"; +import type { ReplyDirectiveParseResult } from "../auto-reply/reply/reply-directives.js"; import type { InlineCodeState } from "../markdown/code-spans.js"; import type { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js"; import type { MessagingToolSend } from "./pi-embedded-messaging.js"; @@ -77,6 +78,10 @@ export type EmbeddedPiSubscribeContext = { emitBlockChunk: (text: string) => void; flushBlockReplyBuffer: () => void; emitReasoningStream: (text: string) => void; + consumeReplyDirectives: ( + text: string, + options?: { final?: boolean }, + ) => ReplyDirectiveParseResult | null; resetAssistantMessageState: (nextAssistantTextBaseline: number) => void; resetForCompactionRetry: () => void; finalizeAssistantTexts: (args: { diff --git a/src/agents/pi-embedded-subscribe.reply-tags.test.ts b/src/agents/pi-embedded-subscribe.reply-tags.test.ts new file mode 100644 index 000000000..5243c8488 --- /dev/null +++ b/src/agents/pi-embedded-subscribe.reply-tags.test.ts @@ -0,0 +1,106 @@ +import type { AssistantMessage } from "@mariozechner/pi-ai"; +import { describe, expect, it, vi } from "vitest"; +import { subscribeEmbeddedPiSession } from "./pi-embedded-subscribe.js"; + +type StubSession = { + subscribe: (fn: (evt: unknown) => void) => () => void; +}; + +describe("subscribeEmbeddedPiSession reply tags", () => { + it("carries reply_to_current across tag-only block chunks", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const onBlockReply = vi.fn(); + + subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run", + onBlockReply, + blockReplyBreak: "text_end", + blockReplyChunking: { + minChars: 1, + maxChars: 50, + breakPreference: "newline", + }, + }); + + handler?.({ type: "message_start", message: { role: "assistant" } }); + handler?.({ + type: "message_update", + message: { role: "assistant" }, + assistantMessageEvent: { + type: "text_delta", + delta: "[[reply_to_current]]\nHello", + }, + }); + handler?.({ + type: "message_update", + message: { role: "assistant" }, + assistantMessageEvent: { type: "text_end" }, + }); + + const assistantMessage = { + role: "assistant", + content: [{ type: "text", text: "[[reply_to_current]]\nHello" }], + } as AssistantMessage; + handler?.({ type: "message_end", message: assistantMessage }); + + expect(onBlockReply).toHaveBeenCalledTimes(1); + const payload = onBlockReply.mock.calls[0]?.[0]; + expect(payload?.text).toBe("Hello"); + expect(payload?.replyToCurrent).toBe(true); + expect(payload?.replyToTag).toBe(true); + }); + + it("flushes trailing directive tails on stream end", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const onBlockReply = vi.fn(); + + subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run", + onBlockReply, + blockReplyBreak: "text_end", + blockReplyChunking: { + minChars: 1, + maxChars: 50, + breakPreference: "newline", + }, + }); + + handler?.({ type: "message_start", message: { role: "assistant" } }); + handler?.({ + type: "message_update", + message: { role: "assistant" }, + assistantMessageEvent: { type: "text_delta", delta: "Hello [[" }, + }); + handler?.({ + type: "message_update", + message: { role: "assistant" }, + assistantMessageEvent: { type: "text_end" }, + }); + + const assistantMessage = { + role: "assistant", + content: [{ type: "text", text: "Hello [[" }], + } as AssistantMessage; + handler?.({ type: "message_end", message: assistantMessage }); + + expect(onBlockReply).toHaveBeenCalledTimes(2); + expect(onBlockReply.mock.calls[0]?.[0]?.text).toBe("Hello"); + expect(onBlockReply.mock.calls[1]?.[0]?.text).toBe("[["); + }); +}); diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index cb6d81be6..a4a4b906a 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -1,4 +1,5 @@ import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js"; +import { createStreamingDirectiveAccumulator } from "../auto-reply/reply/streaming-directives.js"; import { formatToolAggregate } from "../auto-reply/tool-meta.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import type { InlineCodeState } from "../markdown/code-spans.js"; @@ -75,11 +76,13 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar const messagingToolSentTargets = state.messagingToolSentTargets; const pendingMessagingTexts = state.pendingMessagingTexts; const pendingMessagingTargets = state.pendingMessagingTargets; + const replyDirectiveAccumulator = createStreamingDirectiveAccumulator(); const resetAssistantMessageState = (nextAssistantTextBaseline: number) => { state.deltaBuffer = ""; state.blockBuffer = ""; blockChunker?.reset(); + replyDirectiveAccumulator.reset(); state.blockState.thinking = false; state.blockState.final = false; state.blockState.inlineCode = createInlineCodeState(); @@ -374,7 +377,8 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar assistantTexts.push(chunk); rememberAssistantText(chunk); if (!params.onBlockReply) return; - const splitResult = parseReplyDirectives(chunk); + const splitResult = replyDirectiveAccumulator.consume(chunk); + if (!splitResult) return; const { text: cleanedText, mediaUrls, @@ -395,6 +399,9 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar }); }; + const consumeReplyDirectives = (text: string, options?: { final?: boolean }) => + replyDirectiveAccumulator.consume(text, options); + const flushBlockReplyBuffer = () => { if (!params.onBlockReply) return; if (blockChunker?.hasBuffered()) { @@ -447,6 +454,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar emitBlockChunk, flushBlockReplyBuffer, emitReasoningStream, + consumeReplyDirectives, resetAssistantMessageState, resetForCompactionRetry, finalizeAssistantTexts, diff --git a/src/agents/system-prompt.test.ts b/src/agents/system-prompt.test.ts index b5fe28556..e37a17008 100644 --- a/src/agents/system-prompt.test.ts +++ b/src/agents/system-prompt.test.ts @@ -115,6 +115,15 @@ describe("buildAgentSystemPrompt", () => { ); }); + it("includes workspace notes when provided", () => { + const prompt = buildAgentSystemPrompt({ + workspaceDir: "/tmp/clawd", + workspaceNotes: ["Reminder: commit your changes in this workspace after edits."], + }); + + expect(prompt).toContain("Reminder: commit your changes in this workspace after edits."); + }); + it("includes user time when provided (12-hour)", () => { const prompt = buildAgentSystemPrompt({ workspaceDir: "/tmp/clawd", diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index 6a20391c0..9716fed0d 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -148,6 +148,7 @@ export function buildAgentSystemPrompt(params: { skillsPrompt?: string; heartbeatPrompt?: string; docsPath?: string; + workspaceNotes?: string[]; /** Controls which hardcoded sections to include. Defaults to "full". */ promptMode?: PromptMode; runtimeInfo?: { @@ -327,6 +328,7 @@ export function buildAgentSystemPrompt(params: { isMinimal, readToolName, }); + const workspaceNotes = (params.workspaceNotes ?? []).map((note) => note.trim()).filter(Boolean); // For "none" mode, return just the basic identity line if (promptMode === "none") { @@ -403,6 +405,7 @@ export function buildAgentSystemPrompt(params: { "## Workspace", `Your working directory is: ${params.workspaceDir}`, "Treat this directory as the single global workspace for file operations unless explicitly instructed otherwise.", + ...workspaceNotes, "", ...docsSection, params.sandboxInfo?.enabled ? "## Sandbox" : "", diff --git a/src/agents/workspace.test.ts b/src/agents/workspace.test.ts index e14022fde..8c4f5a0de 100644 --- a/src/agents/workspace.test.ts +++ b/src/agents/workspace.test.ts @@ -2,6 +2,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { describe, expect, it } from "vitest"; +import { runCommandWithTimeout } from "../process/exec.js"; import type { WorkspaceBootstrapFile } from "./workspace.js"; import { DEFAULT_AGENTS_FILENAME, @@ -40,6 +41,34 @@ describe("ensureAgentWorkspace", () => { await expect(fs.stat(bootstrap)).resolves.toBeDefined(); }); + it("initializes a git repo for brand-new workspaces when git is available", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-ws-")); + const nested = path.join(dir, "nested"); + const gitAvailable = await runCommandWithTimeout(["git", "--version"], { timeoutMs: 2_000 }) + .then((res) => res.code === 0) + .catch(() => false); + if (!gitAvailable) return; + + await ensureAgentWorkspace({ + dir: nested, + ensureBootstrapFiles: true, + }); + + await expect(fs.stat(path.join(nested, ".git"))).resolves.toBeDefined(); + }); + + it("does not initialize git when workspace already exists", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-ws-")); + await fs.writeFile(path.join(dir, "AGENTS.md"), "custom", "utf-8"); + + await ensureAgentWorkspace({ + dir, + ensureBootstrapFiles: true, + }); + + await expect(fs.stat(path.join(dir, ".git"))).rejects.toBeDefined(); + }); + it("does not overwrite existing AGENTS.md", async () => { const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-ws-")); const agentsPath = path.join(dir, "AGENTS.md"); diff --git a/src/agents/workspace.ts b/src/agents/workspace.ts index cf1de7daf..6732069a9 100644 --- a/src/agents/workspace.ts +++ b/src/agents/workspace.ts @@ -4,6 +4,7 @@ import path from "node:path"; import { fileURLToPath } from "node:url"; import { isSubagentSessionKey } from "../routing/session-key.js"; +import { runCommandWithTimeout } from "../process/exec.js"; import { resolveUserPath } from "../utils.js"; export function resolveDefaultAgentWorkspaceDir( @@ -81,6 +82,35 @@ async function writeFileIfMissing(filePath: string, content: string) { } } +async function hasGitRepo(dir: string): Promise { + try { + await fs.stat(path.join(dir, ".git")); + return true; + } catch { + return false; + } +} + +async function isGitAvailable(): Promise { + try { + const result = await runCommandWithTimeout(["git", "--version"], { timeoutMs: 2_000 }); + return result.code === 0; + } catch { + return false; + } +} + +async function ensureGitRepo(dir: string, isBrandNewWorkspace: boolean) { + if (!isBrandNewWorkspace) return; + if (await hasGitRepo(dir)) return; + if (!(await isGitAvailable())) return; + try { + await runCommandWithTimeout(["git", "init"], { cwd: dir, timeoutMs: 10_000 }); + } catch { + // Ignore git init failures; workspace creation should still succeed. + } +} + export async function ensureAgentWorkspace(params?: { dir?: string; ensureBootstrapFiles?: boolean; @@ -140,6 +170,7 @@ export async function ensureAgentWorkspace(params?: { if (isBrandNewWorkspace) { await writeFileIfMissing(bootstrapPath, bootstrapTemplate); } + await ensureGitRepo(dir, isBrandNewWorkspace); return { dir, diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index 0a8de31c0..a2adbf312 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -250,7 +250,7 @@ export async function runPreparedReply( const prefixedBody = [threadStarterNote, prefixedBodyBase].filter(Boolean).join("\n\n"); const mediaNote = buildInboundMediaNote(ctx); const mediaReplyHint = mediaNote - ? "To send an image back, add a line like: MEDIA:https://example.com/image.jpg (no spaces). Keep caption in the text body." + ? "To send an image back, prefer the message tool (media/path/filePath). If you must inline, use MEDIA:/path or MEDIA:https://example.com/image.jpg (spaces ok, quote if needed). Keep caption in the text body." : undefined; let prefixedCommandBody = mediaNote ? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim() diff --git a/src/auto-reply/reply/streaming-directives.test.ts b/src/auto-reply/reply/streaming-directives.test.ts new file mode 100644 index 000000000..02d32ded8 --- /dev/null +++ b/src/auto-reply/reply/streaming-directives.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from "vitest"; +import { createStreamingDirectiveAccumulator } from "./streaming-directives.js"; + +describe("createStreamingDirectiveAccumulator", () => { + it("stashes reply_to_current until a renderable chunk arrives", () => { + const accumulator = createStreamingDirectiveAccumulator(); + + expect(accumulator.consume("[[reply_to_current]]")).toBeNull(); + + const result = accumulator.consume("Hello"); + expect(result?.text).toBe("Hello"); + expect(result?.replyToCurrent).toBe(true); + expect(result?.replyToTag).toBe(true); + }); + + it("handles reply tags split across chunks", () => { + const accumulator = createStreamingDirectiveAccumulator(); + + expect(accumulator.consume("[[reply_to_")).toBeNull(); + + const result = accumulator.consume("current]] Yo"); + expect(result?.text).toBe("Yo"); + expect(result?.replyToCurrent).toBe(true); + expect(result?.replyToTag).toBe(true); + }); + + it("propagates explicit reply ids across chunks", () => { + const accumulator = createStreamingDirectiveAccumulator(); + + expect(accumulator.consume("[[reply_to: abc-123]]")).toBeNull(); + + const result = accumulator.consume("Hi"); + expect(result?.text).toBe("Hi"); + expect(result?.replyToId).toBe("abc-123"); + expect(result?.replyToTag).toBe(true); + }); +}); diff --git a/src/auto-reply/reply/streaming-directives.ts b/src/auto-reply/reply/streaming-directives.ts new file mode 100644 index 000000000..a79e640a1 --- /dev/null +++ b/src/auto-reply/reply/streaming-directives.ts @@ -0,0 +1,124 @@ +import { splitMediaFromOutput } from "../../media/parse.js"; +import { parseInlineDirectives } from "../../utils/directive-tags.js"; +import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js"; +import type { ReplyDirectiveParseResult } from "./reply-directives.js"; + +type PendingReplyState = { + explicitId?: string; + sawCurrent: boolean; + hasTag: boolean; +}; + +type ParsedChunk = ReplyDirectiveParseResult & { + replyToExplicitId?: string; +}; + +type ConsumeOptions = { + final?: boolean; + silentToken?: string; +}; + +const splitTrailingDirective = (text: string): { text: string; tail: string } => { + const openIndex = text.lastIndexOf("[["); + if (openIndex < 0) return { text, tail: "" }; + const closeIndex = text.indexOf("]]", openIndex + 2); + if (closeIndex >= 0) return { text, tail: "" }; + return { + text: text.slice(0, openIndex), + tail: text.slice(openIndex), + }; +}; + +const parseChunk = (raw: string, options?: { silentToken?: string }): ParsedChunk => { + const split = splitMediaFromOutput(raw); + let text = split.text ?? ""; + + const replyParsed = parseInlineDirectives(text, { + stripAudioTag: false, + stripReplyTags: true, + }); + + if (replyParsed.hasReplyTag) { + text = replyParsed.text; + } + + const silentToken = options?.silentToken ?? SILENT_REPLY_TOKEN; + const isSilent = isSilentReplyText(text, silentToken); + if (isSilent) { + text = ""; + } + + return { + text, + mediaUrls: split.mediaUrls, + mediaUrl: split.mediaUrl, + replyToId: replyParsed.replyToId, + replyToExplicitId: replyParsed.replyToExplicitId, + replyToCurrent: replyParsed.replyToCurrent, + replyToTag: replyParsed.hasReplyTag, + audioAsVoice: split.audioAsVoice, + isSilent, + }; +}; + +const hasRenderableContent = (parsed: ReplyDirectiveParseResult): boolean => + Boolean(parsed.text) || + Boolean(parsed.mediaUrl) || + (parsed.mediaUrls?.length ?? 0) > 0 || + Boolean(parsed.audioAsVoice); + +export function createStreamingDirectiveAccumulator() { + let pendingTail = ""; + let pendingReply: PendingReplyState = { sawCurrent: false, hasTag: false }; + + const reset = () => { + pendingTail = ""; + pendingReply = { sawCurrent: false, hasTag: false }; + }; + + const consume = (raw: string, options: ConsumeOptions = {}): ReplyDirectiveParseResult | null => { + let combined = `${pendingTail}${raw ?? ""}`; + pendingTail = ""; + + if (!options.final) { + const split = splitTrailingDirective(combined); + combined = split.text; + pendingTail = split.tail; + } + + if (!combined) { + return null; + } + + const parsed = parseChunk(combined, { silentToken: options.silentToken }); + const hasTag = pendingReply.hasTag || parsed.replyToTag; + const sawCurrent = pendingReply.sawCurrent || parsed.replyToCurrent; + const explicitId = parsed.replyToExplicitId ?? pendingReply.explicitId; + + const combinedResult: ReplyDirectiveParseResult = { + ...parsed, + replyToId: explicitId, + replyToCurrent: sawCurrent, + replyToTag: hasTag, + }; + + if (!hasRenderableContent(combinedResult)) { + if (hasTag) { + pendingReply = { + explicitId, + sawCurrent, + hasTag, + }; + } + return null; + } + + pendingReply = { sawCurrent: false, hasTag: false }; + return combinedResult; + }; + + return { + consume, + reset, + }; +} diff --git a/src/infra/exec-approvals.ts b/src/infra/exec-approvals.ts index 7047abf89..e962ca706 100644 --- a/src/infra/exec-approvals.ts +++ b/src/infra/exec-approvals.ts @@ -360,11 +360,14 @@ function resolveExecutablePath(rawExecutable: string, cwd?: string, env?: NodeJS } const envPath = env?.PATH ?? process.env.PATH ?? ""; const entries = envPath.split(path.delimiter).filter(Boolean); + const hasExtension = process.platform === "win32" && path.extname(expanded).length > 0; const extensions = process.platform === "win32" - ? (env?.PATHEXT ?? process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM") - .split(";") - .map((ext) => ext.toLowerCase()) + ? hasExtension + ? [""] + : (env?.PATHEXT ?? process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM") + .split(";") + .map((ext) => ext.toLowerCase()) : [""]; for (const entry of entries) { for (const ext of extensions) { @@ -403,6 +406,14 @@ function normalizeMatchTarget(value: string): string { return value.replace(/\\\\/g, "/").toLowerCase(); } +function tryRealpath(value: string): string | null { + try { + return fs.realpathSync(value); + } catch { + return null; + } +} + function globToRegExp(pattern: string): RegExp { let regex = "^"; let i = 0; @@ -435,8 +446,15 @@ function matchesPattern(pattern: string, target: string): boolean { const trimmed = pattern.trim(); if (!trimmed) return false; const expanded = trimmed.startsWith("~") ? expandHome(trimmed) : trimmed; - const normalizedPattern = normalizeMatchTarget(expanded); - const normalizedTarget = normalizeMatchTarget(target); + const hasWildcard = /[*?]/.test(expanded); + let normalizedPattern = expanded; + let normalizedTarget = target; + if (process.platform === "win32" && !hasWildcard) { + normalizedPattern = tryRealpath(expanded) ?? expanded; + normalizedTarget = tryRealpath(target) ?? target; + } + normalizedPattern = normalizeMatchTarget(normalizedPattern); + normalizedTarget = normalizeMatchTarget(normalizedTarget); const regex = globToRegExp(normalizedPattern); return regex.test(normalizedTarget); } diff --git a/src/media/parse.test.ts b/src/media/parse.test.ts index f910d851c..74c1eb52d 100644 --- a/src/media/parse.test.ts +++ b/src/media/parse.test.ts @@ -9,6 +9,24 @@ describe("splitMediaFromOutput", () => { expect(result.text).toBe("Hello world"); }); + it("captures media paths with spaces", () => { + const result = splitMediaFromOutput("MEDIA:/Users/pete/My File.png"); + expect(result.mediaUrls).toEqual(["/Users/pete/My File.png"]); + expect(result.text).toBe(""); + }); + + it("captures quoted media paths with spaces", () => { + const result = splitMediaFromOutput('MEDIA:"/Users/pete/My File.png"'); + expect(result.mediaUrls).toEqual(["/Users/pete/My File.png"]); + expect(result.text).toBe(""); + }); + + it("captures tilde media paths with spaces", () => { + const result = splitMediaFromOutput("MEDIA:~/Pictures/My File.png"); + expect(result.mediaUrls).toEqual(["~/Pictures/My File.png"]); + expect(result.text).toBe(""); + }); + it("keeps audio_as_voice detection stable across calls", () => { const input = "Hello [[audio_as_voice]]"; const first = splitMediaFromOutput(input); diff --git a/src/media/parse.ts b/src/media/parse.ts index 31271ceb7..4e35d6775 100644 --- a/src/media/parse.ts +++ b/src/media/parse.ts @@ -14,11 +14,26 @@ function cleanCandidate(raw: string) { return raw.replace(/^[`"'[{(]+/, "").replace(/[`"'\\})\],]+$/, ""); } -function isValidMedia(candidate: string) { +function isValidMedia(candidate: string, opts?: { allowSpaces?: boolean }) { if (!candidate) return false; - if (candidate.length > 1024) return false; - if (/\s/.test(candidate)) return false; - return /^https?:\/\//i.test(candidate) || candidate.startsWith("/") || candidate.startsWith("./"); + if (candidate.length > 4096) return false; + if (!opts?.allowSpaces && /\s/.test(candidate)) return false; + if (/^https?:\/\//i.test(candidate)) return true; + if (candidate.startsWith("/")) return true; + if (candidate.startsWith("./")) return true; + if (candidate.startsWith("../")) return true; + if (candidate.startsWith("~")) return true; + return false; +} + +function unwrapQuoted(value: string): string | undefined { + const trimmed = value.trim(); + if (trimmed.length < 2) return undefined; + const first = trimmed[0]; + const last = trimmed[trimmed.length - 1]; + if (first !== last) return undefined; + if (first !== `"` && first !== "'" && first !== "`") return undefined; + return trimmed.slice(1, -1).trim(); } // Check if a character offset is inside any fenced code block @@ -73,18 +88,55 @@ export function splitMediaFromOutput(raw: string): { pieces.push(line.slice(cursor, start)); const payload = match[1]; - const parts = payload.split(/\s+/).filter(Boolean); + const unwrapped = unwrapQuoted(payload); + const payloadValue = unwrapped ?? payload; + const parts = unwrapped ? [unwrapped] : payload.split(/\s+/).filter(Boolean); + const mediaStartIndex = media.length; + let validCount = 0; const invalidParts: string[] = []; for (const part of parts) { const candidate = normalizeMediaSource(cleanCandidate(part)); - if (isValidMedia(candidate)) { + if (isValidMedia(candidate, unwrapped ? { allowSpaces: true } : undefined)) { media.push(candidate); hasValidMedia = true; + validCount += 1; } else { invalidParts.push(part); } } + const trimmedPayload = payloadValue.trim(); + const looksLikeLocalPath = + trimmedPayload.startsWith("/") || + trimmedPayload.startsWith("./") || + trimmedPayload.startsWith("../") || + trimmedPayload.startsWith("~") || + trimmedPayload.startsWith("file://"); + if ( + !unwrapped && + validCount === 1 && + invalidParts.length > 0 && + /\s/.test(payloadValue) && + looksLikeLocalPath + ) { + const fallback = normalizeMediaSource(cleanCandidate(payloadValue)); + if (isValidMedia(fallback, { allowSpaces: true })) { + media.splice(mediaStartIndex, media.length - mediaStartIndex, fallback); + hasValidMedia = true; + validCount = 1; + invalidParts.length = 0; + } + } + + if (!hasValidMedia) { + const fallback = normalizeMediaSource(cleanCandidate(payloadValue)); + if (isValidMedia(fallback, { allowSpaces: true })) { + media.push(fallback); + hasValidMedia = true; + invalidParts.length = 0; + } + } + if (hasValidMedia && invalidParts.length > 0) { pieces.push(invalidParts.join(" ")); } diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts index 7833ded93..0e10b9b54 100644 --- a/src/telegram/bot.test.ts +++ b/src/telegram/bot.test.ts @@ -2213,6 +2213,47 @@ describe("createTelegramBot", () => { ).toBe(false); }); + it("blocks native DM commands for unpaired users", async () => { + onSpy.mockReset(); + sendMessageSpy.mockReset(); + commandSpy.mockReset(); + const replySpy = replyModule.__replySpy as unknown as ReturnType; + replySpy.mockReset(); + + loadConfig.mockReturnValue({ + commands: { native: true }, + channels: { + telegram: { + dmPolicy: "pairing", + }, + }, + }); + readTelegramAllowFromStore.mockResolvedValueOnce([]); + + createTelegramBot({ token: "tok" }); + const handler = commandSpy.mock.calls.find((call) => call[0] === "status")?.[1] as + | ((ctx: Record) => Promise) + | undefined; + if (!handler) throw new Error("status command handler missing"); + + await handler({ + message: { + chat: { id: 12345, type: "private" }, + from: { id: 12345, username: "testuser" }, + text: "/status", + date: 1736380800, + message_id: 42, + }, + match: "", + }); + + expect(replySpy).not.toHaveBeenCalled(); + expect(sendMessageSpy).toHaveBeenCalledWith( + 12345, + "You are not authorized to use this command.", + ); + }); + it("streams tool summaries for native slash commands", async () => { onSpy.mockReset(); sendMessageSpy.mockReset(); diff --git a/src/utils/directive-tags.ts b/src/utils/directive-tags.ts index 040e356bc..a58b143dc 100644 --- a/src/utils/directive-tags.ts +++ b/src/utils/directive-tags.ts @@ -2,6 +2,7 @@ export type InlineDirectiveParseResult = { text: string; audioAsVoice: boolean; replyToId?: string; + replyToExplicitId?: string; replyToCurrent: boolean; hasAudioTag: boolean; hasReplyTag: boolean; @@ -71,6 +72,7 @@ export function parseInlineDirectives( text: cleaned, audioAsVoice, replyToId, + replyToExplicitId: lastExplicitId, replyToCurrent: sawCurrent, hasAudioTag, hasReplyTag,