Files
clawdbot/extensions/open-prose/skills/prose/lib/inspector.prose
2026-01-23 00:49:40 +00:00

197 lines
5.3 KiB
Plaintext

# Post-Run Inspector
# Analyzes completed .prose runs for runtime fidelity and task effectiveness
#
# Usage:
# prose run @openprose/lib/inspector
#
# Inputs:
# run_path: Path to the run to inspect (e.g., .prose/runs/20260119-100000-abc123)
# depth: light | deep
# target: vm | task | all
#
# Compounding: Each inspection builds on prior inspections via persistent index agent.
# The index agent uses `persist: user` so inspection history spans all projects.
input run_path: "Path to the run to inspect (e.g., .prose/runs/20260119-100000-abc123)"
input depth: "Inspection depth: light or deep"
input target: "Evaluation target: vm, task, or all"
# ============================================================
# Agents
# ============================================================
agent index:
model: haiku
persist: user
prompt: """
You maintain the inspection registry across all projects.
Track: target_run_id, depth, target, timestamp, verdict.
Return JSON when queried. Store compactly.
"""
agent extractor:
model: sonnet
prompt: """
You extract structured data from .prose run artifacts.
Read state.md, bindings/, and logs carefully.
Return clean JSON.
"""
agent evaluator:
model: opus
prompt: """
You evaluate .prose runs with intelligent judgment.
Rate 1-10 with specific rationale. Be concrete.
"""
agent synthesizer:
model: sonnet
prompt: """
You produce clear reports in requested formats.
"""
# ============================================================
# Phase 0: Check Prior Work
# ============================================================
let prior = resume: index
prompt: """
Any prior inspections for: {run_path}?
Return JSON: { "inspections": [...], "has_light": bool, "has_deep": bool }
"""
# ============================================================
# Phase 1: Extraction
# ============================================================
let extraction = session: extractor
prompt: """
Extract from run at: {run_path}
Depth: {depth}
Prior work: {prior}
ALWAYS get:
- run_id (from path)
- completed (did state.md show completion?)
- error_count (failures in state.md)
- binding_names (list all bindings/)
- output_names (bindings with kind: output)
IF depth=deep AND no prior deep inspection:
- program_source (contents of program.prose)
- execution_summary (key statements from state.md)
- binding_previews (first 300 chars of each binding)
IF prior deep exists, skip deep extraction and note "using cached".
Return JSON.
"""
context: prior
# ============================================================
# Phase 2: Evaluation
# ============================================================
let evaluation = session: evaluator
prompt: """
Evaluate this run.
Target: {target}
Depth: {depth}
Data: {extraction}
Prior findings: {prior}
FOR vm (if target=vm or all):
- completion (1-10): Clean finish?
- binding_integrity (1-10): Expected outputs exist with content?
- vm_verdict: pass/partial/fail
- vm_notes: 1-2 sentences
FOR task (if target=task or all):
- output_substance (1-10): Outputs look real, not empty/error?
- goal_alignment (1-10): Based on program name, does output fit?
- task_verdict: pass/partial/fail
- task_notes: 1-2 sentences
IF depth=deep, add:
- fidelity (1-10): Execution trace matches program structure?
- efficiency (1-10): Reasonable number of steps for the job?
Return JSON with all applicable fields.
"""
context: extraction
# ============================================================
# Phase 3: Synthesis
# ============================================================
parallel:
verdict = session: synthesizer
prompt: """
Machine-readable verdict as JSON:
{
"run_id": "...",
"depth": "{depth}",
"target": "{target}",
"vm": { "verdict": "...", "scores": {...} },
"task": { "verdict": "...", "scores": {...} },
"flags": []
}
Data: {evaluation}
"""
context: evaluation
diagram = session: synthesizer
prompt: """
Simple mermaid flowchart of the run.
Show: inputs -> key steps -> outputs.
Use execution_summary if available, else infer from bindings.
Output only the mermaid code.
Data: {extraction}
"""
context: extraction
report = session: synthesizer
prompt: """
2-paragraph markdown summary:
1. What was inspected, key metrics
2. Findings and any recommendations
Data: {extraction}, {evaluation}
"""
context: { extraction, evaluation }
# ============================================================
# Phase 4: Register
# ============================================================
resume: index
prompt: """
Register this inspection:
run_path: {run_path}
depth: {depth}
target: {target}
verdict: {verdict}
Update your memory with this entry.
"""
context: verdict
# ============================================================
# Output
# ============================================================
output inspection = session: synthesizer
prompt: """
Combine into final output structure:
verdict_json: {verdict}
mermaid: {diagram}
summary: {report}
Return as JSON with these three fields.
"""
context: { verdict, diagram, report }