clawdbot/extensions/open-prose/skills/prose/lib/error-forensics.prose

# Error Forensics
# Deep investigation of failed or problematic runs
#
# Usage:
#   prose run @openprose/lib/error-forensics
#
# Inputs:
#   run_path: Path to the failed/problematic run
#   focus: Optional focus area (vm | program | context | external)
#
# Outputs:
#   - Root cause analysis
#   - Error classification
#   - Fix recommendations
#   - Prevention suggestions

input run_path: "Path to the run to investigate"
input focus: "Optional focus: vm | program | context | external (default: auto-detect)"

# ============================================================
# Agents
# ============================================================

agent investigator:
  model: opus
  prompt: """
    You are a forensic investigator for failed .prose runs.

    You methodically trace execution to find root causes:
    - Read state.md for execution trace
    - Check each binding for errors or unexpected content
    - Look for patterns: where did things go wrong?
    - Distinguish symptoms from causes
  """

agent classifier:
  model: sonnet
  prompt: """
    You classify errors into actionable categories:

    VM errors: The OpenProse VM itself misbehaved
    - State management bugs
    - Incorrect control flow
    - Context passing failures

    Program errors: The .prose program has issues
    - Logic errors
    - Missing error handling
    - Bad agent prompts

    Context errors: Context degradation or bloat
    - Information lost between agents
    - Context too large
    - Wrong context passed

    External errors: Outside factors
    - Tool failures
    - Network issues
    - Resource limits
  """

agent fixer:
  model: opus
  prompt: """
    You propose specific fixes for identified issues.
    Be concrete: show the change, not just describe it.
  """

# ============================================================
# Phase 1: Gather Evidence
# ============================================================

let evidence = session: investigator
  prompt: """
    Gather evidence from the failed run.

    Run: {run_path}

    Read and analyze:
    1. state.md - What was the execution trace? Where did it stop?
    2. bindings/ - Which bindings exist? Any with errors or empty?
    3. program.prose - What was the program trying to do?
    4. agents/ - Any agent memory files with clues?

    Document:
    - Last successful step
    - First sign of trouble
    - Error messages (if any)
    - Unexpected states

    Return structured evidence.
  """

# ============================================================
# Phase 2: Trace Execution
# ============================================================

let trace = session: investigator
  prompt: """
    Trace execution step by step to find the failure point.

    Evidence: {evidence}

    Walk through the execution:
    1. What was the program supposed to do at each step?
    2. What actually happened (according to state.md)?
    3. Where do expected and actual diverge?

    For the divergence point:
    - What was the input to that step?
    - What was the output (or lack thereof)?
    - What should have happened?

    Return:
    {
      "failure_point": { step, statement, expected, actual },
      "chain_of_events": [...],
      "contributing_factors": [...]
    }
  """
  context: evidence

# ============================================================
# Phase 3: Classify Error
# ============================================================

let classification = session: classifier
  prompt: """
    Classify this error.

    Trace: {trace}
    Evidence: {evidence}
    Focus hint: {focus}

    Determine:
    - Primary category (vm | program | context | external)
    - Subcategory (specific type within category)
    - Severity (critical | major | minor)
    - Reproducibility (always | sometimes | rare)

    Return:
    {
      "category": "...",
      "subcategory": "...",
      "severity": "...",
      "reproducibility": "...",
      "confidence": "high" | "medium" | "low",
      "reasoning": "..."
    }
  """
  context: { trace, evidence }

# ============================================================
# Phase 4: Root Cause Analysis
# ============================================================

let root_cause = session: investigator
  prompt: """
    Determine the root cause (not just symptoms).

    Trace: {trace}
    Classification: {classification}

    Ask "why" repeatedly until you reach the root:
    - Why did this step fail?
    - Why was that input malformed?
    - Why did that agent produce that output?
    - ...

    The root cause is the earliest point where an intervention
    would have prevented the failure.

    Return:
    {
      "root_cause": "...",
      "causal_chain": ["step 1", "led to step 2", "which caused failure"],
      "root_cause_category": "vm" | "program" | "context" | "external"
    }
  """
  context: { trace, classification }

# ============================================================
# Phase 5: Fix Recommendations
# ============================================================

let fixes = session: fixer
  prompt: """
    Propose fixes for this failure.

    Root cause: {root_cause}
    Classification: {classification}
    Evidence: {evidence}

    Provide:
    1. Immediate fix (how to make this specific run work)
    2. Permanent fix (how to prevent this class of error)
    3. Detection (how to catch this earlier next time)

    Be specific. If it's a code change, show the diff.
    If it's a process change, describe the new process.

    Return:
    {
      "immediate": { action, details },
      "permanent": { action, details, files_to_change },
      "detection": { action, details },
      "prevention": "how to avoid this in future programs"
    }
  """
  context: { root_cause, classification, evidence }

# ============================================================
# Output
# ============================================================

output report = session "Format report"
  prompt: """
    Format the forensic analysis as a report.

    Evidence: {evidence}
    Trace: {trace}
    Classification: {classification}
    Root cause: {root_cause}
    Fixes: {fixes}

    Structure:
    1. Executive Summary
       - What failed
       - Why it failed (root cause)
       - How to fix it

    2. Timeline
       - Execution trace with failure point highlighted

    3. Root Cause Analysis
       - Causal chain
       - Classification

    4. Recommendations
       - Immediate fix
       - Permanent fix
       - Prevention

    5. Technical Details
       - Evidence gathered
       - Files examined

    Format as markdown.
  """
  context: { evidence, trace, classification, root_cause, fixes }