From 1e2ab8bf1ed00521c987557fa292f45fa0921951 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 17 Jan 2026 19:48:26 +0000 Subject: [PATCH] fix: improve frontmatter parsing --- package.json | 1 + pnpm-lock.yaml | 3 + src/agents/skills/frontmatter.ts | 30 +------ src/hooks/frontmatter.ts | 104 +------------------------ src/markdown/frontmatter.test.ts | 44 +++++++++++ src/markdown/frontmatter.ts | 129 +++++++++++++++++++++++++++++++ 6 files changed, 184 insertions(+), 127 deletions(-) create mode 100644 src/markdown/frontmatter.test.ts create mode 100644 src/markdown/frontmatter.ts diff --git a/package.json b/package.json index ccc815b00..c65bb8f3b 100644 --- a/package.json +++ b/package.json @@ -179,6 +179,7 @@ "tslog": "^4.10.2", "undici": "^7.18.2", "ws": "^8.19.0", + "yaml": "^2.8.2", "zod": "^4.3.5" }, "optionalDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d902c35f8..6ec79cc3c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -148,6 +148,9 @@ importers: ws: specifier: ^8.19.0 version: 8.19.0 + yaml: + specifier: ^2.8.2 + version: 2.8.2 zod: specifier: ^4.3.5 version: 4.3.5 diff --git a/src/agents/skills/frontmatter.ts b/src/agents/skills/frontmatter.ts index b50620ea4..cb1b95d6e 100644 --- a/src/agents/skills/frontmatter.ts +++ b/src/agents/skills/frontmatter.ts @@ -1,5 +1,7 @@ +import JSON5 from "json5"; import type { Skill } from "@mariozechner/pi-coding-agent"; +import { parseFrontmatterBlock } from "../../markdown/frontmatter.js"; import type { ClawdbotSkillMetadata, ParsedSkillFrontmatter, @@ -8,32 +10,8 @@ import type { SkillInvocationPolicy, } from "./types.js"; -function stripQuotes(value: string): string { - if ( - (value.startsWith('"') && value.endsWith('"')) || - (value.startsWith("'") && value.endsWith("'")) - ) { - return value.slice(1, -1); - } - return value; -} - export function parseFrontmatter(content: string): ParsedSkillFrontmatter { - const frontmatter: ParsedSkillFrontmatter = {}; - const normalized = content.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); - if (!normalized.startsWith("---")) return frontmatter; - const endIndex = normalized.indexOf("\n---", 3); - if (endIndex === -1) return frontmatter; - const block = normalized.slice(4, endIndex); - for (const line of block.split("\n")) { - const match = line.match(/^([\w-]+):\s*(.*)$/); - if (!match) continue; - const key = match[1]; - const value = stripQuotes(match[2].trim()); - if (!key || !value) continue; - frontmatter[key] = value; - } - return frontmatter; + return parseFrontmatterBlock(content); } function normalizeStringList(input: unknown): string[] { @@ -99,7 +77,7 @@ export function resolveClawdbotMetadata( const raw = getFrontmatterValue(frontmatter, "metadata"); if (!raw) return undefined; try { - const parsed = JSON.parse(raw) as { clawdbot?: unknown }; + const parsed = JSON5.parse(raw) as { clawdbot?: unknown }; if (!parsed || typeof parsed !== "object") return undefined; const clawdbot = (parsed as { clawdbot?: unknown }).clawdbot; if (!clawdbot || typeof clawdbot !== "object") return undefined; diff --git a/src/hooks/frontmatter.ts b/src/hooks/frontmatter.ts index 1c6525432..d137812c5 100644 --- a/src/hooks/frontmatter.ts +++ b/src/hooks/frontmatter.ts @@ -1,4 +1,6 @@ import JSON5 from "json5"; + +import { parseFrontmatterBlock } from "../markdown/frontmatter.js"; import type { ClawdbotHookMetadata, HookEntry, @@ -7,107 +9,8 @@ import type { ParsedHookFrontmatter, } from "./types.js"; -function stripQuotes(value: string): string { - if ( - (value.startsWith('"') && value.endsWith('"')) || - (value.startsWith("'") && value.endsWith("'")) - ) { - return value.slice(1, -1); - } - return value; -} - -/** - * Extract a multi-line block value from frontmatter lines. - * Handles indented continuation lines (YAML-style multi-line values). - * - * @param lines - All lines in the frontmatter block - * @param startIndex - Index of the line containing the key - * @returns The combined multi-line value and the number of lines consumed - */ -function extractMultiLineValue( - lines: string[], - startIndex: number, -): { value: string; linesConsumed: number } { - const startLine = lines[startIndex]; - const match = startLine.match(/^([\w-]+):\s*(.*)$/); - if (!match) return { value: "", linesConsumed: 1 }; - - const inlineValue = match[2].trim(); - - // If there's a value on the same line, return it (single-line case) - if (inlineValue) { - return { value: inlineValue, linesConsumed: 1 }; - } - - // Multi-line case: collect indented continuation lines - const valueLines: string[] = []; - let i = startIndex + 1; - - while (i < lines.length) { - const line = lines[i]; - // Stop if we hit a non-indented line (new key or empty line without indent) - if (line.length > 0 && !line.startsWith(" ") && !line.startsWith("\t")) { - break; - } - valueLines.push(line); - i++; - } - - // Join and trim the multi-line value - const combined = valueLines.join("\n").trim(); - return { value: combined, linesConsumed: i - startIndex }; -} - export function parseFrontmatter(content: string): ParsedHookFrontmatter { - const frontmatter: ParsedHookFrontmatter = {}; - const normalized = content.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); - if (!normalized.startsWith("---")) return frontmatter; - const endIndex = normalized.indexOf("\n---", 3); - if (endIndex === -1) return frontmatter; - const block = normalized.slice(4, endIndex); - const lines = block.split("\n"); - - let i = 0; - while (i < lines.length) { - const line = lines[i]; - const match = line.match(/^([\w-]+):\s*(.*)$/); - if (!match) { - i++; - continue; - } - - const key = match[1]; - const inlineValue = match[2].trim(); - - if (!key) { - i++; - continue; - } - - // Check if this is a multi-line value (no inline value and next line is indented) - if (!inlineValue && i + 1 < lines.length) { - const nextLine = lines[i + 1]; - if (nextLine.startsWith(" ") || nextLine.startsWith("\t")) { - // Multi-line value - const { value, linesConsumed } = extractMultiLineValue(lines, i); - if (value) { - frontmatter[key] = value; - } - i += linesConsumed; - continue; - } - } - - // Single-line value - const value = stripQuotes(inlineValue); - if (value) { - frontmatter[key] = value; - } - i++; - } - - return frontmatter; + return parseFrontmatterBlock(content); } function normalizeStringList(input: unknown): string[] { @@ -172,7 +75,6 @@ export function resolveClawdbotMetadata( const raw = getFrontmatterValue(frontmatter, "metadata"); if (!raw) return undefined; try { - // Use JSON5 to handle trailing commas and other relaxed JSON syntax const parsed = JSON5.parse(raw) as { clawdbot?: unknown }; if (!parsed || typeof parsed !== "object") return undefined; const clawdbot = (parsed as { clawdbot?: unknown }).clawdbot; diff --git a/src/markdown/frontmatter.test.ts b/src/markdown/frontmatter.test.ts new file mode 100644 index 000000000..2a6323f11 --- /dev/null +++ b/src/markdown/frontmatter.test.ts @@ -0,0 +1,44 @@ +import JSON5 from "json5"; +import { describe, expect, it } from "vitest"; + +import { parseFrontmatterBlock } from "./frontmatter.js"; + +describe("parseFrontmatterBlock", () => { + it("parses YAML block scalars", () => { + const content = `--- +name: yaml-hook +description: | + line one + line two +--- +`; + const result = parseFrontmatterBlock(content); + expect(result.name).toBe("yaml-hook"); + expect(result.description).toBe("line one\nline two"); + }); + + it("handles JSON5-style multi-line metadata", () => { + const content = `--- +name: session-memory +metadata: + { + "clawdbot": + { + "emoji": "disk", + "events": ["command:new"], + }, + } +--- +`; + const result = parseFrontmatterBlock(content); + expect(result.metadata).toBeDefined(); + + const parsed = JSON5.parse(result.metadata ?? "") as { clawdbot?: { emoji?: string } }; + expect(parsed.clawdbot?.emoji).toBe("disk"); + }); + + it("returns empty when frontmatter is missing", () => { + const content = "# No frontmatter"; + expect(parseFrontmatterBlock(content)).toEqual({}); + }); +}); diff --git a/src/markdown/frontmatter.ts b/src/markdown/frontmatter.ts new file mode 100644 index 000000000..7d74067e7 --- /dev/null +++ b/src/markdown/frontmatter.ts @@ -0,0 +1,129 @@ +import YAML from "yaml"; + +export type ParsedFrontmatter = Record; + +function stripQuotes(value: string): string { + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + return value.slice(1, -1); + } + return value; +} + +function coerceFrontmatterValue(value: unknown): string | undefined { + if (value === null || value === undefined) return undefined; + if (typeof value === "string") return value.trim(); + if (typeof value === "number" || typeof value === "boolean") return String(value); + if (typeof value === "object") { + try { + return JSON.stringify(value); + } catch { + return undefined; + } + } + return undefined; +} + +function parseYamlFrontmatter(block: string): ParsedFrontmatter | null { + try { + const parsed = YAML.parse(block) as unknown; + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null; + const result: ParsedFrontmatter = {}; + for (const [rawKey, value] of Object.entries(parsed as Record)) { + const key = rawKey.trim(); + if (!key) continue; + const coerced = coerceFrontmatterValue(value); + if (coerced === undefined) continue; + result[key] = coerced; + } + return result; + } catch { + return null; + } +} + +function extractMultiLineValue( + lines: string[], + startIndex: number, +): { value: string; linesConsumed: number } { + const startLine = lines[startIndex]; + const match = startLine.match(/^([\w-]+):\s*(.*)$/); + if (!match) return { value: "", linesConsumed: 1 }; + + const inlineValue = match[2].trim(); + if (inlineValue) { + return { value: inlineValue, linesConsumed: 1 }; + } + + const valueLines: string[] = []; + let i = startIndex + 1; + + while (i < lines.length) { + const line = lines[i]; + if (line.length > 0 && !line.startsWith(" ") && !line.startsWith("\t")) { + break; + } + valueLines.push(line); + i++; + } + + const combined = valueLines.join("\n").trim(); + return { value: combined, linesConsumed: i - startIndex }; +} + +function parseLineFrontmatter(block: string): ParsedFrontmatter { + const frontmatter: ParsedFrontmatter = {}; + const lines = block.split("\n"); + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + const match = line.match(/^([\w-]+):\s*(.*)$/); + if (!match) { + i++; + continue; + } + + const key = match[1]; + const inlineValue = match[2].trim(); + + if (!key) { + i++; + continue; + } + + if (!inlineValue && i + 1 < lines.length) { + const nextLine = lines[i + 1]; + if (nextLine.startsWith(" ") || nextLine.startsWith("\t")) { + const { value, linesConsumed } = extractMultiLineValue(lines, i); + if (value) { + frontmatter[key] = value; + } + i += linesConsumed; + continue; + } + } + + const value = stripQuotes(inlineValue); + if (value) { + frontmatter[key] = value; + } + i++; + } + + return frontmatter; +} + +export function parseFrontmatterBlock(content: string): ParsedFrontmatter { + const normalized = content.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); + if (!normalized.startsWith("---")) return {}; + const endIndex = normalized.indexOf("\n---", 3); + if (endIndex === -1) return {}; + const block = normalized.slice(4, endIndex); + + const yamlParsed = parseYamlFrontmatter(block); + if (yamlParsed !== null) return yamlParsed; + return parseLineFrontmatter(block); +}