clawdbot/extensions/open-prose/skills/prose/lib/profiler.prose

# Profiler
# Analyzes OpenProse runs for cost, tokens, and time using actual API data
#
# Usage:
#   prose run @openprose/lib/profiler
#
# Inputs:
#   run_path: Path to run to analyze, or "recent" for latest runs
#   scope: single | compare | trend
#
# Outputs:
#   - Cost breakdown (VM vs subagents, by agent, by model)
#   - Time breakdown (wall-clock, per-session, parallelism effectiveness)
#   - Token usage patterns
#   - Efficiency metrics ($/second, tokens/second)
#   - Bottleneck identification
#   - Optimization recommendations
#
# Data Sources:
#   Primary: Claude Code's jsonl files in ~/.claude/projects/{project}/{session}/
#   - Main session: {session}.jsonl (VM orchestration)
#   - Subagents: subagents/agent-*.jsonl (OpenProse sessions)
#
#   From each assistant message:
#   - Tokens: input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens
#   - Model: message.model
#   - Timestamps: for duration calculations
#
#   Pricing: Fetched live from Anthropic's pricing page
#
# Supported Tools:
#   - Claude Code (~/.claude) - full support
#   - OpenCode, Amp, Codex - may have different structures, will warn

input run_path: "Path to run, or 'recent' for latest runs in .prose/runs/"
input scope: "Scope: single (one run) | compare (multiple runs) | trend (over time)"

const PRICING_URL = "https://platform.claude.com/docs/en/about-claude/pricing#model-pricing"

# ============================================================
# Agents
# ============================================================

agent detector:
  model: haiku
  prompt: """
    You detect which AI coding tool was used and find its data files.

    Check for:
    1. ~/.claude/projects/ - Claude Code (full support)
    2. ~/.opencode/ - OpenCode (may differ)
    3. ~/.amp/ - Amp (may differ)
    4. ~/.codex/ - Codex (may differ)

    If not Claude Code, warn the user that analysis may be incomplete.
  """

agent collector:
  model: sonnet
  prompt: """
    You locate and inventory AI coding tool session files.

    For Claude Code (~/.claude/projects/{project}/{session}/):
    1. Main session file: {session}.jsonl - VM orchestration
    2. Subagent files: subagents/agent-*.jsonl - OpenProse sessions

    Your job is to FIND the files, not process them.
    Return file paths for the calculator agent to process.
  """

agent calculator:
  model: sonnet
  prompt: """
    You calculate metrics by writing and executing inline Python scripts.

    CRITICAL RULES:
    1. NEVER do math in your head - always use Python
    2. NEVER create standalone .py files - use inline scripts only
    3. Run scripts with heredoc style: python3 << 'EOF' ... EOF
    4. MUST process ALL files: main_jsonl AND EVERY file in subagent_jsonls[]

    BEFORE CALCULATING:
    Fetch current pricing from the pricing URL provided in your prompt.
    Extract per-million-token rates for each Claude model.

    YOUR PYTHON SCRIPT MUST:
    1. Process the main_jsonl file (VM orchestration data)
    2. Process EVERY file in subagent_jsonls[] (subagent session data)
       - This is critical! There may be 10-20+ subagent files
       - Each contains token usage that MUST be counted
    3. For each file, read line by line and extract from type="assistant":
       - usage.input_tokens, usage.output_tokens
       - usage.cache_creation_input_tokens, usage.cache_read_input_tokens
       - message.model (for pricing tier)
       - timestamp (for duration calculation)
    4. From Task prompts in subagent files, extract:
       - Agent name: regex `You are the "([^"]+)" agent`
       - Binding name: regex `/bindings/([^.]+)\\.md`
    5. Calculate costs using the pricing you fetched
    6. Calculate durations from first to last timestamp per file
    7. Output structured JSON with VM and subagent data SEPARATELY

    VALIDATION: If subagents.total.cost is 0 but subagent_jsonls has files,
    your script has a bug - fix it before outputting.
  """
  permissions:
    network: [PRICING_URL]

agent analyzer:
  model: opus
  prompt: """
    You analyze profiling data and identify optimization opportunities.

    You receive pre-calculated data (computed by Python, not estimated).
    Your job is interpretation and recommendations, not calculation.

    COST ANALYSIS:
    - VM overhead vs subagent costs (percentage split)
    - Per-agent costs (which agents are most expensive?)
    - Per-binding costs (which outputs cost the most?)
    - Model tier usage (is opus used where sonnet would suffice?)
    - Cache efficiency (cache_read vs cache_write ratio)

    TIME ANALYSIS:
    - Wall-clock duration vs sum of session durations
    - Parallelism effectiveness (ratio shows how much parallelization helped)
    - Per-agent time (which agents are slowest?)
    - Bottlenecks (sequential operations that blocked progress)

    EFFICIENCY ANALYSIS:
    - Cost per second ($/s)
    - Tokens per second (throughput)
    - Cost vs time correlation (expensive but fast? cheap but slow?)

    RECOMMENDATIONS:
    - Model tier downgrades where appropriate
    - Parallelization opportunities (sequential ops that could be parallel)
    - Batching opportunities (many small sessions that could consolidate)
    - Context trimming if input tokens seem excessive
  """

agent tracker:
  model: haiku
  persist: user
  prompt: """
    You track profiling metrics across runs for trend analysis.
    Store: run_id, program, timestamp, total_cost, total_time, vm_cost, subagent_cost, by_model.
    Compare against historical data when available.
  """

# ============================================================
# Phase 1: Detect Tool and Find Data
# ============================================================

let tool_detection = session: detector
  prompt: """
    Detect which AI coding tool was used for this OpenProse run.

    Run path: {run_path}

    1. If run_path is in .prose/runs/, extract the run timestamp
    2. Look for corresponding session in:
       - ~/.claude/projects/ (Claude Code) - check subfolders for sessions
       - Other tool directories as fallback

    3. If found in ~/.claude:
       - Return the full session path
       - List the main jsonl file and subagent files
       - This is the primary data source

    4. If NOT found in ~/.claude:
       - Check for opencode/amp/codex directories
       - WARN: "Non-Claude Code tool detected. Token data structure may differ."

    5. If no tool data found:
       - Return tool="not-found" with clear error
       - Do NOT attempt estimation

    Return JSON:
    {
      "tool": "claude-code" | "opencode" | "amp" | "codex" | "not-found",
      "session_path": "/path/to/session/" | null,
      "main_jsonl": "/path/to/session.jsonl" | null,
      "subagent_jsonls": [...] | [],
      "error": null | "Error message",
      "warnings": []
    }
  """

# ============================================================
# Guard: Exit if no data available
# ============================================================

assert tool_detection.tool != "not-found":
  """
  ERROR: Profiling requires actual data from AI tool session files.

  Could not find session data for this run. This can happen if:
  1. The run was not executed with Claude Code (or supported tool)
  2. The Claude Code session has been deleted or moved
  3. The run path does not correspond to an existing session

  Supported tools: Claude Code (~/.claude)
  Partial support: OpenCode, Amp, Codex (structure may differ)
  """

# ============================================================
# Phase 2: Locate Session Files
# ============================================================

let runs_to_analyze = session: collector
  prompt: """
    Find runs to analyze and locate their session files.

    Input: {run_path}
    Scope: {scope}
    Tool detection: {tool_detection}

    If run_path is a specific path, use that run.
    If run_path is "recent", find the latest 5-10 runs in .prose/runs/

    For each run, locate:
    1. The .prose/runs/{run_id}/ directory
    2. The corresponding Claude Code session
    3. List all jsonl files (main session + subagents/)

    Return JSON array:
    [
      {
        "run_id": "...",
        "prose_run_path": "/path/to/.prose/runs/xxx/",
        "session_path": "/path/to/claude/session/",
        "main_jsonl": "/path/to/session.jsonl",
        "subagent_jsonls": [...]
      }
    ]
  """
  context: tool_detection

# ============================================================
# Phase 3: Calculate Metrics (single Python pass per run)
# ============================================================

let metrics = runs_to_analyze | pmap:
  session: calculator
    prompt: """
      Calculate all metrics for: {item}

      STEP 1: Fetch current pricing from {PRICING_URL}
      Note the per-million-token rates for each model (input, output, cache).

      STEP 2: Write and execute an inline Python script that processes:
      - Main jsonl: {item.main_jsonl}
      - Subagent jsonls: {item.subagent_jsonls}

      EXTRACT FROM EACH ASSISTANT MESSAGE:
      - usage.input_tokens, usage.output_tokens
      - usage.cache_creation_input_tokens, usage.cache_read_input_tokens
      - model (for pricing tier)
      - timestamp (for duration calculation)

      EXTRACT FROM TASK PROMPTS (user messages in subagent files):
      - Agent name: regex `You are the "([^"]+)" agent`
      - Binding name: regex `/bindings/([^.]+)\.md`

      CALCULATE:
      - Cost: tokens * pricing rates you fetched
      - Duration: time between first and last message per session
      - Wall-clock: total run duration

      OUTPUT JSON:
      {
        "run_id": "...",
        "program": "...",
        "wall_clock_seconds": N,
        "vm_orchestration": {
          "tokens": { "input": N, "output": N, "cache_write": N, "cache_read": N },
          "cost": 0.00,
          "duration_seconds": N,
          "model": "...",
          "message_count": N
        },
        "subagents": {
          "total": { "tokens": {...}, "cost": 0.00, "duration_seconds": N },
          "by_agent": {
            "agent_name": {
              "tokens": {...},
              "cost": 0.00,
              "duration_seconds": N,
              "sessions": N,
              "model": "..."
            }
          },
          "by_binding": {
            "binding_name": { "tokens": {...}, "cost": 0.00, "duration_seconds": N, "agent": "..." }
          }
        },
        "by_model": {
          "opus": { "tokens": {...}, "cost": 0.00 },
          "sonnet": { "tokens": {...}, "cost": 0.00 },
          "haiku": { "tokens": {...}, "cost": 0.00 }
        },
        "total": {
          "tokens": { "input": N, "output": N, "cache_write": N, "cache_read": N, "total": N },
          "cost": 0.00,
          "duration_seconds": N
        },
        "efficiency": {
          "cost_per_second": 0.00,
          "tokens_per_second": N,
          "parallelism_factor": N  // sum(session_durations) / wall_clock
        }
      }
    """
    context: item

# ============================================================
# Phase 4: Analyze
# ============================================================

let analysis = session: analyzer
  prompt: """
    Analyze the profiling data.

    Pre-calculated metrics: {metrics}
    Scope: {scope}

    All numbers were calculated by Python. Trust them - focus on insights.

    FOR SINGLE RUN:

    1. COST ATTRIBUTION
       - VM overhead vs subagent costs (percentage)
       - Rank agents by cost
       - Flag expensive models on simple tasks

    2. TIME ATTRIBUTION
       - Wall-clock vs sum of session durations
       - Parallelism factor interpretation:
         - Factor near 1.0 = fully sequential
         - Factor > 2.0 = good parallelization
         - Factor > 5.0 = excellent parallelization
       - Identify slowest agents/bindings

    3. EFFICIENCY
       - Cost per second (is expensive time well-spent?)
       - Tokens per second (throughput)
       - Correlation: expensive-and-fast vs cheap-and-slow

    4. CACHE EFFICIENCY
       - Read/write ratio
       - Assessment: good (>5:1), fair (2-5:1), poor (<2:1)

    5. HOTSPOTS
       - Top 5 by cost
       - Top 5 by time
       - Note any that appear in both lists

    6. RECOMMENDATIONS
       - Model downgrades (specific: "agent X could use sonnet")
       - Parallelization opportunities (specific sequential ops)
       - Batching opportunities (many small similar sessions)
       - Context trimming if input >> output

    FOR COMPARE (multiple runs):
    - Show cost and time differences
    - Identify what changed between runs
    - Note improvements or regressions

    FOR TREND (over time):
    - Show cost and time progression
    - Identify trend direction
    - Flag anomalies

    Return structured JSON with all analysis sections.
  """
  context: metrics

# ============================================================
# Phase 5: Track for Trends
# ============================================================

resume: tracker
  prompt: """
    Record this profiling data for trend tracking.

    Run: {metrics[0].run_id}
    Program: {metrics[0].program}
    Total cost: {analysis.summary.total_cost}
    Total time: {analysis.summary.total_time}
    Efficiency: {analysis.summary.efficiency}

    Add to your historical record with timestamp.
    If you have previous runs of the same program, note the trend.
  """
  context: analysis

# ============================================================
# Output
# ============================================================

output report = session "Format profiler report"
  prompt: """
    Format the profiling analysis as a professional report.

    Analysis: {analysis}
    Tool: {tool_detection.tool}

    ## Report Structure:

    ### 1. Executive Summary
    - Total cost and wall-clock time
    - Key finding (most significant insight)
    - Tool used

    ### 2. Cost Attribution
    | Category | Cost | % of Total |
    |----------|------|------------|
    | VM Orchestration | $X.XX | XX% |
    | Subagent Execution | $X.XX | XX% |
    | **Total** | $X.XX | 100% |

    ### 3. Time Attribution
    | Category | Time | % of Wall-Clock |
    |----------|------|-----------------|
    | VM Orchestration | Xs | XX% |
    | Subagent Execution | Xs | XX% |
    | **Wall-Clock** | Xs | - |
    | **Sum of Sessions** | Xs | - |
    | **Parallelism Factor** | X.Xx | - |

    ### 4. By Agent
    | Agent | Model | Sessions | Cost | Time | $/s |
    |-------|-------|----------|------|------|-----|

    ### 5. By Model Tier
    | Model | Cost | % | Tokens | % |
    |-------|------|---|--------|---|

    ### 6. Cache Efficiency
    - Read/write ratio and assessment

    ### 7. Hotspots
    **By Cost:**
    1. ...

    **By Time:**
    1. ...

    ### 8. Efficiency Analysis
    - Cost per second
    - Tokens per second
    - Parallelism effectiveness

    ### 9. Recommendations
    Prioritized list with estimated impact

    Format as clean markdown with tables.
  """
  context: analysis