#!/usr/bin/env node /** * The Builders Podcast Generator v2 * * Two modes: * --quick : 15-20 min episode, single research pass * --deep : 45-60 min episode, multi-layer research + enhancement */ import { readFileSync, writeFileSync, mkdirSync, readdirSync, existsSync, rmSync } from 'fs'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; import { execSync } from 'child_process'; const __dirname = dirname(fileURLToPath(import.meta.url)); const SKILL_DIR = join(__dirname, '..'); const OPENAI_API_KEY = process.env.OPENAI_API_KEY; // Load the Acquired Bible const BIBLE = readFileSync(join(SKILL_DIR, 'templates/acquired-bible.md'), 'utf-8'); // Section definitions - QUICK mode const SECTIONS_QUICK = [ { id: 0, name: 'INTRO', turns: 8, description: 'Welcome, casual banter, introduce the company, tease why this story is fascinating' }, { id: 1, name: 'HOOK', turns: 12, description: 'Open with surprising fact or tension. Central question. Stakes.' }, { id: 2, name: 'ORIGIN', turns: 20, description: 'Founders as humans. Genesis moment. Early bet. Market context.' }, { id: 3, name: 'INFLECTION_1', turns: 18, description: 'First major decision. Alternatives. Stakes. What they risked.' }, { id: 4, name: 'INFLECTION_2', turns: 18, description: 'Second pivot. New challenge. How they adapted.' }, { id: 5, name: 'MESSY_MIDDLE', turns: 14, description: 'Near-death. Internal conflicts. Real struggle, not glorified.' }, { id: 6, name: 'NOW', turns: 12, description: 'Current state. Metrics. Competition. Open questions.' }, { id: 7, name: 'TAKEAWAYS', turns: 10, description: 'Key lessons. Frameworks. Final thought tying back to hook.' }, ]; // Section definitions - DEEP mode (more turns, more sections) const SECTIONS_DEEP = [ { id: 0, name: 'INTRO', turns: 10, description: 'Welcome, what excited them about this research, why this company matters now' }, { id: 1, name: 'HOOK', turns: 15, description: 'Vivid opening scene. The moment that defines the company. Stakes made visceral.' }, { id: 2, name: 'ORIGIN', turns: 35, description: 'Deep founder story - actual quotes, specific moments, human struggles. Market context of the era. What the world looked like.' }, { id: 3, name: 'EARLY_PRODUCT', turns: 25, description: 'First product/service. How it actually worked mechanically. Early customers. The insight that made it work.' }, { id: 4, name: 'INFLECTION_1', turns: 25, description: 'First major decision. Board meeting details. What alternatives existed. Quotes from people who were there.' }, { id: 5, name: 'SCALE', turns: 25, description: 'How they scaled. Operational details. What broke. How they fixed it. Specific numbers.' }, { id: 6, name: 'INFLECTION_2', turns: 25, description: 'Second pivot or crisis. The moment it almost fell apart. How they survived.' }, { id: 7, name: 'COMPETITION', turns: 20, description: 'Competitive landscape. Who they beat and how. Who almost beat them.' }, { id: 8, name: 'CULTURE', turns: 20, description: 'Internal culture. Leadership philosophy. Quotes from employees. What makes them different.' }, { id: 9, name: 'NOW', turns: 20, description: 'Current state with specific metrics. Recent moves. Open questions. What could go wrong.' }, { id: 10, name: 'TAKEAWAYS', turns: 15, description: 'Key frameworks. What other companies can learn. Final thought tying back to hook.' }, ]; const VOICES = { Person1: 'alloy', // Maya Person2: 'echo', // James }; const log = (msg) => console.log(`[${new Date().toISOString().slice(11,19)}] ${msg}`); // ============ LLM CALLS ============ async function llm(prompt, model = 'gpt-5.2', maxTokens = 8000) { const response = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', headers: { 'Authorization': `Bearer ${OPENAI_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model, messages: [{ role: 'user', content: prompt }], max_completion_tokens: maxTokens, temperature: 0.8, }), }); if (!response.ok) { const error = await response.text(); throw new Error(`API error: ${response.status} - ${error}`); } const data = await response.json(); return data.choices[0].message.content; } async function llmO3(prompt) { // Use o3 for reasoning-heavy tasks const response = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', headers: { 'Authorization': `Bearer ${OPENAI_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: 'o3', messages: [{ role: 'user', content: prompt }], max_completion_tokens: 16000, }), }); if (!response.ok) { const error = await response.text(); throw new Error(`o3 API error: ${response.status} - ${error}`); } const data = await response.json(); return data.choices[0].message.content; } // ============ RESEARCH ============ async function runGeminiResearch(topic) { log('Running Gemini Deep Research...'); const start = Date.now(); const result = execSync( `cd /home/elie/github/clawdis/skills/deepresearch && ./scripts/deepresearch.sh "${topic}"`, { encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024, timeout: 600000 } ); const elapsed = ((Date.now() - start) / 1000).toFixed(1); log(`Gemini research complete in ${elapsed}s`); return result; } async function runBraveSearch(query, count = 5) { try { const result = execSync( `node /home/elie/github/clawdis/skills/brave-search/scripts/search.mjs "${query}" -n ${count} --content 2>/dev/null`, { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024, timeout: 60000 } ); return result; } catch (e) { return ''; } } async function runExaSearch(query) { try { const result = execSync( `node /home/elie/github/clawdis/skills/researcher/scripts/research.mjs "${query}" 2>/dev/null`, { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024, timeout: 120000 } ); return result; } catch (e) { return ''; } } async function multiLayerResearch(topic, company, outputDir) { log('Starting 5-layer deep research...'); const research = {}; // Layer 1: Overview (Gemini) log(' Layer 1/5: Overview (Gemini)...'); research.overview = await runGeminiResearch(topic); writeFileSync(join(outputDir, 'research-1-overview.md'), research.overview); // Layer 2: Founder stories log(' Layer 2/5: Founder interviews...'); research.founders = await runBraveSearch(`"${company}" founder interview quotes early days story`, 8); writeFileSync(join(outputDir, 'research-2-founders.md'), research.founders); // Layer 3: Key decisions (contemporary coverage) log(' Layer 3/5: Contemporary coverage...'); research.decisions = await runBraveSearch(`"${company}" site:wsj.com OR site:nytimes.com OR site:forbes.com history`, 8); writeFileSync(join(outputDir, 'research-3-decisions.md'), research.decisions); // Layer 4: Financial/operational details log(' Layer 4/5: Financial details...'); research.financials = await runBraveSearch(`"${company}" revenue profit margin business model analysis`, 5); writeFileSync(join(outputDir, 'research-4-financials.md'), research.financials); // Layer 5: Contrarian/struggles log(' Layer 5/5: Struggles and criticism...'); research.contrarian = await runBraveSearch(`"${company}" almost failed crisis problems criticism`, 5); writeFileSync(join(outputDir, 'research-5-contrarian.md'), research.contrarian); // Combine all research const combined = `# RESEARCH COMPILATION: ${company} ## PART 1: OVERVIEW ${research.overview} ## PART 2: FOUNDER STORIES & INTERVIEWS ${research.founders} ## PART 3: CONTEMPORARY COVERAGE & KEY DECISIONS ${research.decisions} ## PART 4: FINANCIAL & OPERATIONAL DETAILS ${research.financials} ## PART 5: STRUGGLES, CRISES & CRITICISM ${research.contrarian} `; writeFileSync(join(outputDir, 'research-combined.md'), combined); log('All research layers complete'); return combined; } async function synthesizeResearch(research, company, outputDir) { log('Synthesizing research with o3...'); const prompt = `You are a research analyst preparing materials for a deep-dive podcast about ${company}. From this research, extract: 1. **50 MOST SURPRISING FACTS** - specific numbers, dates, details that would make someone say "Wait, really?" 2. **10 BEST QUOTES** - actual quotes from founders, employees, or articles WITH attribution Format: "Quote here" — Person Name, Source, Year 3. **5 "WAIT REALLY?" MOMENTS** - the most counterintuitive or shocking facts 4. **KEY TIMELINE** - 15-20 most important dates with specific events 5. **NARRATIVE THREADS** - the 3-4 main story arcs that make this company interesting 6. **CONTRARIAN TAKES** - what critics say, what almost went wrong, the messy parts 7. **NUMBERS THAT MATTER** - specific metrics that tell the story (revenue, margins, users, etc.) Be SPECIFIC. Include actual numbers, names, dates. No generic statements. RESEARCH: ${research.slice(0, 100000)}`; const synthesis = await llmO3(prompt); writeFileSync(join(outputDir, 'research-synthesis.md'), synthesis); log('Research synthesis complete'); return synthesis; } // ============ SCRIPT GENERATION ============ function buildSectionPrompt(section, research, synthesis, topic, hook, prevContext, isDeep, usedFacts = []) { const contextSize = isDeep ? 25000 : 10000; const introPrompt = section.name === 'INTRO' ? ` ## INTRO REQUIREMENTS - Start with "Welcome back to The Builders..." - Maya introduces herself, then James - Brief friendly banter about what excited them in the research - Name the company: "Today we're diving into ${topic}" - Tease 2-3 surprising things they'll cover - End with natural transition to the hook ` : ''; const hookLine = section.name === 'HOOK' ? ` ## OPENING HOOK TO BUILD ON "${hook}" ` : ''; const synthesisSection = synthesis ? ` ## KEY FACTS & QUOTES TO USE ${synthesis.slice(0, 15000)} ` : ''; // DEDUPLICATION: List facts already used in previous sections const usedFactsSection = usedFacts.length > 0 ? ` ## ⛔ FACTS ALREADY USED - DO NOT REPEAT THESE The following facts, quotes, and statistics have already been mentioned in earlier sections. DO NOT use them again. Find NEW facts from the research. ${usedFacts.map((f, i) => `${i+1}. ${f}`).join('\n')} ** IMPORTANT: Using any fact from the above list is a critical error. Use DIFFERENT facts.** ` : ''; return `# Generate Section ${section.id}: ${section.name} ${BIBLE} ## YOUR TASK Write EXACTLY ${section.turns} dialogue turns for the ${section.name} section. This should feel like two friends discovering a story together, NOT a lecture. ## SECTION GOAL ${section.description} ${introPrompt} ${hookLine} ${usedFactsSection} ## FORMAT RULES - Use (Maya) and (James) XML tags - Each turn: 2-5 sentences - real conversation, not speeches - Include AT LEAST 3 interruptions ("Wait—", "Hold on—", "Back up—") - Include AT LEAST 3 genuine reactions ("That's insane", "Wait, really?", "I had no idea") - USE SPECIFIC QUOTES from the research with attribution - USE SPECIFIC NUMBERS and dates - Maya asks technical "how does this work" questions - James provides strategic context and patterns - They BUILD on each other, not just take turns - **DO NOT REPEAT** any facts from earlier sections ${synthesisSection} ## RESEARCH ${research.slice(0, contextSize)} ${prevContext ? `## PREVIOUS CONTEXT\n${prevContext}\n` : ''} --- Generate EXACTLY ${section.turns} turns. Start with Include at least 3 NEW specific facts/quotes from the research (not used before). Make it feel like genuine discovery.`; } // Extract key facts from a section for deduplication async function extractFactsFromSection(sectionText) { const prompt = `Extract the KEY FACTS mentioned in this podcast section. List each unique fact as a short phrase. Focus on: - Specific numbers (dollars, percentages, counts) - Specific dates and years - Direct quotes with attribution - Named events or milestones - Specific products, prices, or metrics Return ONLY a JSON array of strings, each being a short fact (10-20 words max). Example: ["$1.50 hot dog price unchanged since 1985", "93.3% renewal rate in US/Canada"] Section: ${sectionText}`; try { const result = await llm(prompt, 'gpt-4o-mini', 2000); const match = result.match(/\[[\s\S]*\]/); if (match) { return JSON.parse(match[0]); } } catch (e) { // Fallback: extract numbers and quotes manually } // Fallback extraction const facts = []; // Extract quotes const quotes = sectionText.match(/"[^"]+"\s*—[^<]+/g) || []; facts.push(...quotes.slice(0, 5).map(q => q.slice(0, 100))); // Extract numbers with context const numbers = sectionText.match(/\$[\d,.]+ (?:million|billion|percent|%)|[\d,]+% |[\d,]+ (?:SKU|warehouse|employee|member|year)/gi) || []; facts.push(...[...new Set(numbers)].slice(0, 10)); return facts; } // ============ TTS ============ async function generateTTS(turns, outputDir) { const audioDir = join(outputDir, 'audio'); if (existsSync(audioDir)) rmSync(audioDir, { recursive: true }); mkdirSync(audioDir, { recursive: true }); log(`Generating TTS for ${turns.length} turns...`); const start = Date.now(); // Process in batches of 15 for (let i = 0; i < turns.length; i += 15) { const batch = turns.slice(i, i + 15); const promises = batch.map(async (turn, j) => { const idx = i + j; const num = String(idx + 1).padStart(4, '0'); const voice = VOICES[turn.speaker]; const outPath = join(audioDir, `turn-${num}.mp3`); const res = await fetch('https://api.openai.com/v1/audio/speech', { method: 'POST', headers: { 'Authorization': `Bearer ${OPENAI_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: 'tts-1-hd', input: turn.text.slice(0, 4096), voice: voice, }), }); if (!res.ok) return null; const buffer = await res.arrayBuffer(); writeFileSync(outPath, Buffer.from(buffer)); return outPath; }); await Promise.all(promises); log(` ${Math.min(i + 15, turns.length)}/${turns.length} turns`); } const elapsed = ((Date.now() - start) / 1000).toFixed(1); log(`TTS complete in ${elapsed}s`); // Merge with ffmpeg const files = readdirSync(audioDir).filter(f => f.endsWith('.mp3')).sort(); const listPath = join(audioDir, 'files.txt'); writeFileSync(listPath, files.map(f => `file '${join(audioDir, f)}'`).join('\n')); const episodePath = join(outputDir, 'episode.mp3'); execSync(`ffmpeg -y -f concat -safe 0 -i "${listPath}" -c copy "${episodePath}" 2>/dev/null`); const duration = execSync( `ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "${episodePath}"`, { encoding: 'utf-8' } ).trim(); return { path: episodePath, duration: parseFloat(duration) }; } // ============ MAIN ============ async function main() { const args = process.argv.slice(2); if (args.length === 0 || args.includes('--help')) { console.log(` Usage: node generate.mjs "" [options] Modes: --quick 15-20 min episode, single research pass (default) --deep 45-60 min episode, multi-layer research + synthesis Options: -o, --output Output directory (default: ./output) -m, --model LLM model (default: gpt-5.2) --skip-research Skip research phase (use existing) --skip-tts Skip TTS generation --help Show this help Features: - Auto-resume: If process dies, re-run same command to resume from checkpoint - Deduplication: Tracks facts across sections to prevent repetition - Checkpoints saved to: /checkpoints/ Examples: node generate.mjs "Costco" --quick -o /tmp/costco node generate.mjs "OpenAI" --deep -o /tmp/openai # Resume interrupted generation (just re-run same command): node generate.mjs "OpenAI" --deep -o /tmp/openai `); process.exit(0); } const topic = args[0]; const outputDir = args.includes('-o') ? args[args.indexOf('-o') + 1] : './output'; const model = args.includes('-m') ? args[args.indexOf('-m') + 1] : 'gpt-5.2'; const isDeep = args.includes('--deep'); const skipResearch = args.includes('--skip-research'); const skipTTS = args.includes('--skip-tts'); // Extract company name from topic const company = topic.split(':')[0].split(' ')[0]; const SECTIONS = isDeep ? SECTIONS_DEEP : SECTIONS_QUICK; const targetTurns = SECTIONS.reduce((sum, s) => sum + s.turns, 0); const targetDuration = isDeep ? '45-60' : '15-20'; mkdirSync(outputDir, { recursive: true }); console.log(` ╔════════════════════════════════════════════════════════════╗ ║ The Builders Podcast Generator ║ ║ Mode: ${isDeep ? 'DEEP DIVE (45-60 min)' : 'QUICK DIVE (15-20 min)'} ║ ║ Target: ${targetTurns} turns ║ ╚════════════════════════════════════════════════════════════╝ `); log(`Topic: ${topic}`); log(`Output: ${outputDir}`); log(`Model: ${model}`); // ---- RESEARCH ---- let research, synthesis = ''; const researchPath = join(outputDir, isDeep ? 'research-combined.md' : 'research.md'); const synthesisPath = join(outputDir, 'research-synthesis.md'); if (skipResearch && existsSync(researchPath)) { log('Using existing research...'); research = readFileSync(researchPath, 'utf-8'); if (isDeep && existsSync(synthesisPath)) { synthesis = readFileSync(synthesisPath, 'utf-8'); } } else if (isDeep) { // Multi-layer research for deep dive research = await multiLayerResearch(topic, company, outputDir); synthesis = await synthesizeResearch(research, company, outputDir); } else { // Single pass for quick dive research = await runGeminiResearch(topic); writeFileSync(researchPath, research); } // ---- GENERATE HOOK ---- log('Generating hook...'); const hookPrompt = `Based on this research about ${topic}, generate 3 compelling opening hooks: 1. A SCENE hook - put us in a specific moment (boardroom, product launch, near-bankruptcy) 2. A DATA hook - a surprising statistic that reframes everything 3. A QUESTION hook - a provocative central question Each should be 2-3 sentences, vivid, specific. Mark the best one with [SELECTED]. ${isDeep ? 'Use specific details from the research - names, dates, numbers.' : ''} Research: ${research.slice(0, 15000)}`; const hookResponse = await llm(hookPrompt, model); writeFileSync(join(outputDir, 'hooks.md'), hookResponse); // Extract selected hook const hookMatch = hookResponse.match(/\[SELECTED\][\s\S]*?[""]([^""]+)[""]/); const hook = hookMatch ? hookMatch[1] : "This is a story that will change how you think about business."; log(`Hook: "${hook.slice(0, 80)}..."`); // ---- GENERATE SECTIONS (with checkpointing) ---- log(`Generating ${SECTIONS.length} script sections...`); const allSections = []; let prevContext = ''; let totalTurns = 0; let usedFacts = []; // Track facts across sections for deduplication // Checkpoint paths const checkpointDir = join(outputDir, 'checkpoints'); const checkpointState = join(checkpointDir, 'state.json'); mkdirSync(checkpointDir, { recursive: true }); // Load existing checkpoint if resuming let startSection = 0; if (existsSync(checkpointState)) { try { const state = JSON.parse(readFileSync(checkpointState, 'utf-8')); startSection = state.completedSections || 0; usedFacts = state.usedFacts || []; prevContext = state.prevContext || ''; log(`📂 Resuming from checkpoint: section ${startSection}/${SECTIONS.length}`); // Load existing sections for (let i = 0; i < startSection; i++) { const sectionPath = join(checkpointDir, `section-${i}.txt`); if (existsSync(sectionPath)) { const content = readFileSync(sectionPath, 'utf-8'); allSections.push(content); totalTurns += (content.match(//g) || []).length; } } log(` Loaded ${allSections.length} existing sections (${totalTurns} turns)`); } catch (e) { log('⚠️ Checkpoint corrupted, starting fresh'); startSection = 0; } } for (const section of SECTIONS) { // Skip already completed sections if (section.id < startSection) { continue; } const start = Date.now(); log(` Section ${section.id}: ${section.name} (target: ${section.turns})...`); const prompt = buildSectionPrompt( section, research, synthesis, topic, hook, prevContext, isDeep, usedFacts ); const result = await llm(prompt, model, isDeep ? 12000 : 8000); const turns = (result.match(//g) || []).length; const elapsed = ((Date.now() - start) / 1000).toFixed(1); log(` → ${turns} turns in ${elapsed}s`); allSections.push(result); prevContext = result.slice(0, isDeep ? 1500 : 800); totalTurns += turns; // Save section checkpoint immediately writeFileSync(join(checkpointDir, `section-${section.id}.txt`), result); // Extract facts from this section to prevent repetition in future sections if (isDeep && section.id < SECTIONS.length - 1) { log(` Extracting facts for deduplication...`); const newFacts = await extractFactsFromSection(result); usedFacts = [...usedFacts, ...newFacts].slice(-100); // Keep last 100 facts log(` ${newFacts.length} facts tracked (${usedFacts.length} total)`); } // Save checkpoint state after each section writeFileSync(checkpointState, JSON.stringify({ completedSections: section.id + 1, usedFacts, prevContext, timestamp: new Date().toISOString() }, null, 2)); log(` 💾 Checkpoint saved`); } // Combine script const fullScript = allSections.join('\n\n'); writeFileSync(join(outputDir, 'script.txt'), fullScript); log(`Script complete: ${totalTurns} total turns`); // ---- TTS ---- if (!skipTTS) { // Parse turns const turns = []; const regex = /<(Person[12])>([\s\S]*?)<\/Person[12]>/g; let match; while ((match = regex.exec(fullScript)) !== null) { turns.push({ speaker: match[1], text: match[2].trim() }); } log(`Parsed ${turns.length} speaker turns`); log(` Maya (Person1): ${turns.filter(t => t.speaker === 'Person1').length}`); log(` James (Person2): ${turns.filter(t => t.speaker === 'Person2').length}`); const { path, duration } = await generateTTS(turns, outputDir); console.log(` ╔════════════════════════════════════════════════════════════╗ ║ Generation Complete ║ ╠════════════════════════════════════════════════════════════╣ ║ Mode: ${isDeep ? 'DEEP DIVE' : 'QUICK DIVE'} ║ ║ Total turns: ${String(totalTurns).padEnd(44)}║ ║ Duration: ${String((duration / 60).toFixed(1) + ' minutes').padEnd(47)}║ ║ Output: ${path.slice(-50).padEnd(49)}║ ╚════════════════════════════════════════════════════════════╝ `); } else { log('TTS skipped'); } } main().catch(err => { console.error('Error:', err.message); process.exit(1); });