feat(macos): add Canvas A2UI renderer

2025-12-17 11:35:06 +01:00
parent 1cdebb68a0
commit cdb5ddb2da
408 changed files with 73598 additions and 32 deletions
--- a/vendor/a2ui/specification/0.8/eval/src/index.ts
+++ b/vendor/a2ui/specification/0.8/eval/src/index.ts
@@ -0,0 +1,363 @@
+/*
+ Copyright 2025 Google LLC
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+import { componentGeneratorFlow, ai } from "./flows";
+import * as fs from "fs";
+import * as path from "path";
+import { modelsToTest } from "./models";
+import { prompts, TestPrompt } from "./prompts";
+import { validateSchema } from "./validator";
+
+interface InferenceResult {
+  modelName: string;
+  prompt: TestPrompt;
+  component: any;
+  error: any;
+  latency: number;
+  validationResults: string[];
+  runNumber: number;
+}
+
+function generateSummary(
+  resultsByModel: Record<string, InferenceResult[]>,
+  results: InferenceResult[],
+): string {
+  const promptNameWidth = 40;
+  const latencyWidth = 20;
+  const failedRunsWidth = 15;
+  const toolErrorRunsWidth = 20;
+
+  let summary = "# Evaluation Summary";
+  for (const modelName in resultsByModel) {
+    summary += `\n\n## Model: ${modelName}\n\n`;
+    const header = `| ${"Prompt Name".padEnd(
+      promptNameWidth,
+    )} | ${"Avg Latency (ms)".padEnd(latencyWidth)} | ${"Failed Runs".padEnd(
+      failedRunsWidth,
+    )} | ${"Tool Error Runs".padEnd(toolErrorRunsWidth)} |`;
+    const divider = `|${"-".repeat(promptNameWidth + 2)}|${"-".repeat(
+      latencyWidth + 2,
+    )}|${"-".repeat(failedRunsWidth + 2)}|${"-".repeat(
+      toolErrorRunsWidth + 2,
+    )}|`;
+    summary += header;
+    summary += `\n${divider}`;
+
+    const promptsInModel = resultsByModel[modelName].reduce(
+      (acc, result) => {
+        if (!acc[result.prompt.name]) {
+          acc[result.prompt.name] = [];
+        }
+        acc[result.prompt.name].push(result);
+        return acc;
+      },
+      {} as Record<string, InferenceResult[]>,
+    );
+
+    let totalModelFailedRuns = 0;
+
+    for (const promptName in promptsInModel) {
+      const runs = promptsInModel[promptName];
+      const totalRuns = runs.length;
+      const errorRuns = runs.filter((r) => r.error).length;
+      const failedRuns = runs.filter(
+        (r) => r.error || r.validationResults.length > 0,
+      ).length;
+      const totalLatency = runs.reduce((acc, r) => acc + r.latency, 0);
+      const avgLatency = (totalLatency / totalRuns).toFixed(0);
+
+      totalModelFailedRuns += failedRuns;
+
+      const failedRunsStr =
+        failedRuns > 0 ? `${failedRuns} / ${totalRuns}` : "";
+      const errorRunsStr = errorRuns > 0 ? `${errorRuns} / ${totalRuns}` : "";
+
+      summary += `\n| ${promptName.padEnd(
+        promptNameWidth,
+      )} | ${avgLatency.padEnd(latencyWidth)} | ${failedRunsStr.padEnd(
+        failedRunsWidth,
+      )} | ${errorRunsStr.padEnd(toolErrorRunsWidth)} |`;
+    }
+
+    const totalRunsForModel = resultsByModel[modelName].length;
+    summary += `\n\n**Total failed runs:** ${totalModelFailedRuns} / ${totalRunsForModel}`;
+  }
+
+  summary += "\n\n---\n\n## Overall Summary\n";
+  const totalRuns = results.length;
+  const totalToolErrorRuns = results.filter((r) => r.error).length;
+  const totalRunsWithAnyFailure = results.filter(
+    (r) => r.error || r.validationResults.length > 0,
+  ).length;
+  const modelsWithFailures = [
+    ...new Set(
+      results
+        .filter((r) => r.error || r.validationResults.length > 0)
+        .map((r) => r.modelName),
+    ),
+  ].join(", ");
+
+  summary += `\n- **Number of tool error runs:** ${totalToolErrorRuns} / ${totalRuns}`;
+  summary += `\n- **Number of runs with any failure (tool error or validation):** ${totalRunsWithAnyFailure} / ${totalRuns}`;
+  const latencies = results.map((r) => r.latency).sort((a, b) => a - b);
+  const totalLatency = latencies.reduce((acc, l) => acc + l, 0);
+  const meanLatency = (totalLatency / totalRuns).toFixed(0);
+  let medianLatency = 0;
+  if (latencies.length > 0) {
+    const mid = Math.floor(latencies.length / 2);
+    if (latencies.length % 2 === 0) {
+      medianLatency = (latencies[mid - 1] + latencies[mid]) / 2;
+    } else {
+      medianLatency = latencies[mid];
+    }
+  }
+
+  summary += `\n- **Mean Latency:** ${meanLatency} ms`;
+  summary += `\n- **Median Latency:** ${medianLatency} ms`;
+  if (modelsWithFailures) {
+    summary += `\n- **Models with at least one failure:** ${modelsWithFailures}`;
+  }
+  return summary;
+}
+
+import yargs from "yargs";
+import { hideBin } from "yargs/helpers";
+
+// Run the flow
+async function main() {
+  const argv = await yargs(hideBin(process.argv))
+    .option("verbose", {
+      alias: "v",
+      type: "boolean",
+      description: "Run with verbose logging",
+      default: false,
+    })
+    .option("keep", {
+      type: "string",
+      description:
+        "Directory to keep output files. If no path is provided, a temporary directory will be created.",
+      coerce: (arg) => (arg === undefined ? true : arg),
+    })
+    .option("runs-per-prompt", {
+      type: "number",
+      description: "Number of times to run each prompt",
+      default: 1,
+    })
+    .option("model", {
+      type: "string",
+      array: true,
+      description: "Filter models by exact name",
+      default: [],
+      choices: modelsToTest.map((m) => m.name),
+    })
+    .option("prompt", {
+      type: "string",
+      description: "Filter prompts by name prefix",
+    })
+    .help()
+    .alias("h", "help").argv;
+
+  const verbose = argv.verbose;
+  const keep = argv.keep;
+  let outputDir: string | null = null;
+
+  if (keep) {
+    if (typeof keep === "string") {
+      outputDir = keep;
+    } else {
+      outputDir = fs.mkdtempSync(path.join(process.cwd(), "a2ui-eval-"));
+    }
+    if (!fs.existsSync(outputDir)) {
+      fs.mkdirSync(outputDir, { recursive: true });
+    }
+    console.log(`Keeping output in: ${outputDir}`);
+  }
+
+  const runsPerPrompt = argv["runs-per-prompt"];
+
+  let filteredModels = modelsToTest;
+  if (argv.model && argv.model.length > 0) {
+    const modelNames = argv.model as string[];
+    filteredModels = modelsToTest.filter((m) => modelNames.includes(m.name));
+    if (filteredModels.length === 0) {
+      console.error(`No models found matching: ${modelNames.join(", ")}.`);
+      process.exit(1);
+    }
+  }
+
+  let filteredPrompts = prompts;
+  if (argv.prompt) {
+    filteredPrompts = prompts.filter((p) =>
+      p.name.startsWith(argv.prompt as string)
+    );
+    if (filteredPrompts.length === 0) {
+      console.error(`No prompt found with prefix "${argv.prompt}".`);
+      process.exit(1);
+    }
+  }
+
+  const generationPromises: Promise<InferenceResult>[] = [];
+
+  for (const prompt of filteredPrompts) {
+    const schemaString = fs.readFileSync(
+      path.join(__dirname, prompt.schemaPath),
+      "utf-8"
+    );
+    const schema = JSON.parse(schemaString);
+    for (const modelConfig of filteredModels) {
+      const modelDirName = modelConfig.name.replace(/[\/:]/g, "_");
+      const modelOutputDir = outputDir
+        ? path.join(outputDir, modelDirName)
+        : null;
+      if (modelOutputDir && !fs.existsSync(modelOutputDir)) {
+        fs.mkdirSync(modelOutputDir, { recursive: true });
+      }
+      for (let i = 1; i <= runsPerPrompt; i++) {
+        console.log(
+          `Queueing generation for model: ${modelConfig.name}, prompt: ${prompt.name} (run ${i})`
+        );
+        const startTime = Date.now();
+        generationPromises.push(
+          componentGeneratorFlow({
+            prompt: prompt.promptText,
+            model: modelConfig.model,
+            config: modelConfig.config,
+            schema,
+          })
+            .then((component) => {
+              if (modelOutputDir) {
+                const inputPath = path.join(
+                  modelOutputDir,
+                  `${prompt.name}.input.txt`
+                );
+                fs.writeFileSync(inputPath, prompt.promptText);
+
+                const outputPath = path.join(
+                  modelOutputDir,
+                  `${prompt.name}.output.json`
+                );
+                fs.writeFileSync(
+                  outputPath,
+                  JSON.stringify(component, null, 2)
+                );
+              }
+              const validationResults = validateSchema(
+                component,
+                prompt.schemaPath,
+                prompt.matchers
+              );
+              return {
+                modelName: modelConfig.name,
+                prompt,
+                component,
+                error: null,
+                latency: Date.now() - startTime,
+                validationResults,
+                runNumber: i,
+              };
+            })
+            .catch((error) => {
+              if (modelOutputDir) {
+                const inputPath = path.join(
+                  modelOutputDir,
+                  `${prompt.name}.input.txt`
+                );
+                fs.writeFileSync(inputPath, prompt.promptText);
+
+                const errorPath = path.join(
+                  modelOutputDir,
+                  `${prompt.name}.error.json`
+                );
+                const errorOutput = {
+                  message: error.message,
+                  stack: error.stack,
+                  ...error,
+                };
+                fs.writeFileSync(
+                  errorPath,
+                  JSON.stringify(errorOutput, null, 2)
+                );
+              }
+              return {
+                modelName: modelConfig.name,
+                prompt,
+                component: null,
+                error,
+                latency: Date.now() - startTime,
+                validationResults: [],
+                runNumber: i,
+              };
+            })
+        );
+      }
+    }
+  }
+
+  const results = await Promise.all(generationPromises);
+
+  const resultsByModel: Record<string, InferenceResult[]> = {};
+
+  for (const result of results) {
+    if (!resultsByModel[result.modelName]) {
+      resultsByModel[result.modelName] = [];
+    }
+    resultsByModel[result.modelName].push(result);
+  }
+
+  console.log("\n--- Generation Results ---");
+  for (const modelName in resultsByModel) {
+    for (const result of resultsByModel[modelName]) {
+      const hasError = !!result.error;
+      const hasValidationFailures = result.validationResults.length > 0;
+      const hasComponent = !!result.component;
+
+      if (hasError || hasValidationFailures || (verbose && hasComponent)) {
+        console.log(`\n----------------------------------------`);
+        console.log(`Model: ${modelName}`);
+        console.log(`----------------------------------------`);
+        console.log(`\nQuery: ${result.prompt.name} (run ${result.runNumber})`);
+
+        if (hasError) {
+          console.error("Error generating component:", result.error);
+        } else if (hasComponent) {
+          if (hasValidationFailures) {
+            console.log("Validation Failures:");
+            result.validationResults.forEach((failure) =>
+              console.log(`- ${failure}`)
+            );
+          }
+          if (verbose) {
+            if (hasValidationFailures) {
+              console.log("Generated schema:");
+              console.log(JSON.stringify(result.component, null, 2));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  const summary = generateSummary(resultsByModel, results);
+  console.log(summary);
+  if (outputDir) {
+    const summaryPath = path.join(outputDir, "summary.md");
+    fs.writeFileSync(summaryPath, summary);
+  }
+}
+
+if (require.main === module) {
+  main().catch(console.error);
+}