fix: retry embedding 5xx errors

This commit is contained in:
Peter Steinberger
2026-01-17 22:48:50 +00:00
parent 9ca4c10e59
commit 7d2e510087
4 changed files with 59 additions and 6 deletions

View File

@@ -138,11 +138,18 @@ describe("memory indexing with OpenAI batches", () => {
expect(result.manager).not.toBeNull();
if (!result.manager) throw new Error("manager missing");
manager = result.manager;
await manager.sync({ force: true });
const labels: string[] = [];
await manager.sync({
force: true,
progress: (update) => {
if (update.label) labels.push(update.label);
},
});
const status = manager.status();
expect(status.chunks).toBeGreaterThan(0);
expect(embedBatch).not.toHaveBeenCalled();
expect(fetchMock).toHaveBeenCalled();
expect(labels.some((label) => label.toLowerCase().includes("batch"))).toBe(true);
});
});

View File

@@ -192,6 +192,47 @@ describe("memory embedding batches", () => {
expect(calls).toBe(3);
}, 10000);
it("retries embeddings on transient 5xx errors", async () => {
const line = "e".repeat(120);
const content = Array.from({ length: 12 }, () => line).join("\n");
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-08.md"), content);
let calls = 0;
embedBatch.mockImplementation(async (texts: string[]) => {
calls += 1;
if (calls < 3) {
throw new Error("openai embeddings failed: 502 Bad Gateway (cloudflare)");
}
return texts.map(() => [0, 1, 0]);
});
const cfg = {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath },
chunking: { tokens: 200, overlap: 0 },
sync: { watch: false, onSessionStart: false, onSearch: false },
query: { minScore: 0 },
},
},
list: [{ id: "main", default: true }],
},
};
const result = await getMemorySearchManager({ cfg, agentId: "main" });
expect(result.manager).not.toBeNull();
if (!result.manager) throw new Error("manager missing");
manager = result.manager;
await manager.sync({ force: true });
expect(calls).toBe(3);
}, 10000);
it("skips empty chunks so embeddings input stays valid", async () => {
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "\n\n\n");

View File

@@ -733,7 +733,7 @@ export class MemoryIndexManager {
params.progress.report({
completed: params.progress.completed,
total: params.progress.total,
label: "Indexing memory files…",
label: this.batch.enabled ? "Indexing memory files (batch)..." : "Indexing memory files…",
});
}
@@ -784,7 +784,7 @@ export class MemoryIndexManager {
params.progress.report({
completed: params.progress.completed,
total: params.progress.total,
label: "Indexing session files…",
label: this.batch.enabled ? "Indexing session files (batch)..." : "Indexing session files…",
});
}
@@ -1357,7 +1357,7 @@ export class MemoryIndexManager {
return await this.provider.embedBatch(texts);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (!this.isRateLimitError(message) || attempt >= EMBEDDING_RETRY_MAX_ATTEMPTS) {
if (!this.isRetryableEmbeddingError(message) || attempt >= EMBEDDING_RETRY_MAX_ATTEMPTS) {
throw err;
}
const waitMs = Math.min(
@@ -1372,8 +1372,10 @@ export class MemoryIndexManager {
}
}
private isRateLimitError(message: string): boolean {
return /(rate[_ ]limit|too many requests|429|resource has been exhausted)/i.test(message);
private isRetryableEmbeddingError(message: string): boolean {
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare)/i.test(
message,
);
}
private async runWithConcurrency<T>(tasks: Array<() => Promise<T>>, limit: number): Promise<T[]> {