fix: retry openai batch indexing
This commit is contained in:
@@ -148,4 +148,102 @@ describe("memory indexing with OpenAI batches", () => {
|
|||||||
expect(fetchMock).toHaveBeenCalled();
|
expect(fetchMock).toHaveBeenCalled();
|
||||||
expect(labels.some((label) => label.toLowerCase().includes("batch"))).toBe(true);
|
expect(labels.some((label) => label.toLowerCase().includes("batch"))).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("retries OpenAI batch create on transient failures", async () => {
|
||||||
|
const content = ["retry", "the", "batch"].join("\n\n");
|
||||||
|
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-08.md"), content);
|
||||||
|
|
||||||
|
let uploadedRequests: Array<{ custom_id?: string }> = [];
|
||||||
|
let batchCreates = 0;
|
||||||
|
const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||||
|
const url =
|
||||||
|
typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
|
||||||
|
if (url.endsWith("/files")) {
|
||||||
|
const body = init?.body;
|
||||||
|
if (!(body instanceof FormData)) {
|
||||||
|
throw new Error("expected FormData upload");
|
||||||
|
}
|
||||||
|
for (const [key, value] of body.entries()) {
|
||||||
|
if (key !== "file") continue;
|
||||||
|
if (typeof value === "string") {
|
||||||
|
uploadedRequests = value
|
||||||
|
.split("\n")
|
||||||
|
.filter(Boolean)
|
||||||
|
.map((line) => JSON.parse(line) as { custom_id?: string });
|
||||||
|
} else {
|
||||||
|
const text = await value.text();
|
||||||
|
uploadedRequests = text
|
||||||
|
.split("\n")
|
||||||
|
.filter(Boolean)
|
||||||
|
.map((line) => JSON.parse(line) as { custom_id?: string });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new Response(JSON.stringify({ id: "file_1" }), {
|
||||||
|
status: 200,
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (url.endsWith("/batches")) {
|
||||||
|
batchCreates += 1;
|
||||||
|
if (batchCreates === 1) {
|
||||||
|
return new Response("upstream connect error", { status: 503 });
|
||||||
|
}
|
||||||
|
return new Response(JSON.stringify({ id: "batch_1", status: "in_progress" }), {
|
||||||
|
status: 200,
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (url.endsWith("/batches/batch_1")) {
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({ id: "batch_1", status: "completed", output_file_id: "file_out" }),
|
||||||
|
{ status: 200, headers: { "Content-Type": "application/json" } },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (url.endsWith("/files/file_out/content")) {
|
||||||
|
const lines = uploadedRequests.map((request, index) =>
|
||||||
|
JSON.stringify({
|
||||||
|
custom_id: request.custom_id,
|
||||||
|
response: {
|
||||||
|
status_code: 200,
|
||||||
|
body: { data: [{ embedding: [index + 1, 0, 0], index: 0 }] },
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return new Response(lines.join("\n"), {
|
||||||
|
status: 200,
|
||||||
|
headers: { "Content-Type": "application/jsonl" },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
throw new Error(`unexpected fetch ${url}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
vi.stubGlobal("fetch", fetchMock);
|
||||||
|
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
workspace: workspaceDir,
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
store: { path: indexPath },
|
||||||
|
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||||
|
query: { minScore: 0 },
|
||||||
|
remote: { batch: { enabled: true, wait: true } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [{ id: "main", default: true }],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||||
|
expect(result.manager).not.toBeNull();
|
||||||
|
if (!result.manager) throw new Error("manager missing");
|
||||||
|
manager = result.manager;
|
||||||
|
await manager.sync({ force: true });
|
||||||
|
|
||||||
|
const status = manager.status();
|
||||||
|
expect(status.chunks).toBeGreaterThan(0);
|
||||||
|
expect(batchCreates).toBe(2);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import { retryAsync } from "../infra/retry.js";
|
||||||
import type { OpenAiEmbeddingClient } from "./embeddings.js";
|
import type { OpenAiEmbeddingClient } from "./embeddings.js";
|
||||||
import { hashText } from "./internal.js";
|
import { hashText } from "./internal.js";
|
||||||
|
|
||||||
@@ -33,6 +34,30 @@ export type OpenAiBatchOutputLine = {
|
|||||||
export const OPENAI_BATCH_ENDPOINT = "/v1/embeddings";
|
export const OPENAI_BATCH_ENDPOINT = "/v1/embeddings";
|
||||||
const OPENAI_BATCH_COMPLETION_WINDOW = "24h";
|
const OPENAI_BATCH_COMPLETION_WINDOW = "24h";
|
||||||
const OPENAI_BATCH_MAX_REQUESTS = 50000;
|
const OPENAI_BATCH_MAX_REQUESTS = 50000;
|
||||||
|
const OPENAI_BATCH_RETRY = {
|
||||||
|
attempts: 3,
|
||||||
|
minDelayMs: 500,
|
||||||
|
maxDelayMs: 5000,
|
||||||
|
jitter: 0.1,
|
||||||
|
};
|
||||||
|
|
||||||
|
type RetryableError = Error & { status?: number };
|
||||||
|
|
||||||
|
function isRetryableBatchError(err: unknown): boolean {
|
||||||
|
const status =
|
||||||
|
typeof (err as RetryableError)?.status === "number" ? (err as RetryableError).status : undefined;
|
||||||
|
if (typeof status === "number") {
|
||||||
|
return status === 429 || status >= 500;
|
||||||
|
}
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
return /timeout|timed out|ECONNRESET|ECONNREFUSED|EHOSTUNREACH|ENOTFOUND|EAI_AGAIN|network|fetch failed|upstream connect/i.test(
|
||||||
|
message,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatRetryError(err: unknown): string {
|
||||||
|
return err instanceof Error ? err.message : String(err);
|
||||||
|
}
|
||||||
|
|
||||||
function getOpenAiBaseUrl(openAi: OpenAiEmbeddingClient): string {
|
function getOpenAiBaseUrl(openAi: OpenAiEmbeddingClient): string {
|
||||||
return openAi.baseUrl?.replace(/\/$/, "") ?? "";
|
return openAi.baseUrl?.replace(/\/$/, "") ?? "";
|
||||||
@@ -63,10 +88,43 @@ function splitOpenAiBatchRequests(requests: OpenAiBatchRequest[]): OpenAiBatchRe
|
|||||||
return groups;
|
return groups;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchOpenAiWithRetry(params: {
|
||||||
|
openAi: OpenAiEmbeddingClient;
|
||||||
|
url: string;
|
||||||
|
init?: RequestInit;
|
||||||
|
label: string;
|
||||||
|
debug?: (message: string, data?: Record<string, unknown>) => void;
|
||||||
|
}): Promise<Response> {
|
||||||
|
return await retryAsync(
|
||||||
|
async () => {
|
||||||
|
const res = await fetch(params.url, params.init);
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text();
|
||||||
|
const err = new Error(`openai batch ${params.label} failed: ${res.status} ${text}`);
|
||||||
|
(err as RetryableError).status = res.status;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
},
|
||||||
|
{
|
||||||
|
...OPENAI_BATCH_RETRY,
|
||||||
|
label: params.label,
|
||||||
|
shouldRetry: isRetryableBatchError,
|
||||||
|
onRetry: (info) => {
|
||||||
|
params.debug?.(
|
||||||
|
`openai batch ${params.label} retry ${info.attempt}/${info.maxAttempts} in ${info.delayMs}ms`,
|
||||||
|
{ error: formatRetryError(info.err) },
|
||||||
|
);
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
async function submitOpenAiBatch(params: {
|
async function submitOpenAiBatch(params: {
|
||||||
openAi: OpenAiEmbeddingClient;
|
openAi: OpenAiEmbeddingClient;
|
||||||
requests: OpenAiBatchRequest[];
|
requests: OpenAiBatchRequest[];
|
||||||
agentId: string;
|
agentId: string;
|
||||||
|
debug?: (message: string, data?: Record<string, unknown>) => void;
|
||||||
}): Promise<OpenAiBatchStatus> {
|
}): Promise<OpenAiBatchStatus> {
|
||||||
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
||||||
const jsonl = params.requests.map((request) => JSON.stringify(request)).join("\n");
|
const jsonl = params.requests.map((request) => JSON.stringify(request)).join("\n");
|
||||||
@@ -78,67 +136,73 @@ async function submitOpenAiBatch(params: {
|
|||||||
`memory-embeddings.${hashText(String(Date.now()))}.jsonl`,
|
`memory-embeddings.${hashText(String(Date.now()))}.jsonl`,
|
||||||
);
|
);
|
||||||
|
|
||||||
const fileRes = await fetch(`${baseUrl}/files`, {
|
const fileRes = await fetchOpenAiWithRetry({
|
||||||
method: "POST",
|
openAi: params.openAi,
|
||||||
headers: getOpenAiHeaders(params.openAi, { json: false }),
|
url: `${baseUrl}/files`,
|
||||||
body: form,
|
init: {
|
||||||
|
method: "POST",
|
||||||
|
headers: getOpenAiHeaders(params.openAi, { json: false }),
|
||||||
|
body: form,
|
||||||
|
},
|
||||||
|
label: "file upload",
|
||||||
|
debug: params.debug,
|
||||||
});
|
});
|
||||||
if (!fileRes.ok) {
|
|
||||||
const text = await fileRes.text();
|
|
||||||
throw new Error(`openai batch file upload failed: ${fileRes.status} ${text}`);
|
|
||||||
}
|
|
||||||
const filePayload = (await fileRes.json()) as { id?: string };
|
const filePayload = (await fileRes.json()) as { id?: string };
|
||||||
if (!filePayload.id) {
|
if (!filePayload.id) {
|
||||||
throw new Error("openai batch file upload failed: missing file id");
|
throw new Error("openai batch file upload failed: missing file id");
|
||||||
}
|
}
|
||||||
|
|
||||||
const batchRes = await fetch(`${baseUrl}/batches`, {
|
const batchRes = await fetchOpenAiWithRetry({
|
||||||
method: "POST",
|
openAi: params.openAi,
|
||||||
headers: getOpenAiHeaders(params.openAi, { json: true }),
|
url: `${baseUrl}/batches`,
|
||||||
body: JSON.stringify({
|
init: {
|
||||||
input_file_id: filePayload.id,
|
method: "POST",
|
||||||
endpoint: OPENAI_BATCH_ENDPOINT,
|
headers: getOpenAiHeaders(params.openAi, { json: true }),
|
||||||
completion_window: OPENAI_BATCH_COMPLETION_WINDOW,
|
body: JSON.stringify({
|
||||||
metadata: {
|
input_file_id: filePayload.id,
|
||||||
source: "clawdbot-memory",
|
endpoint: OPENAI_BATCH_ENDPOINT,
|
||||||
agent: params.agentId,
|
completion_window: OPENAI_BATCH_COMPLETION_WINDOW,
|
||||||
},
|
metadata: {
|
||||||
}),
|
source: "clawdbot-memory",
|
||||||
|
agent: params.agentId,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
label: "create",
|
||||||
|
debug: params.debug,
|
||||||
});
|
});
|
||||||
if (!batchRes.ok) {
|
|
||||||
const text = await batchRes.text();
|
|
||||||
throw new Error(`openai batch create failed: ${batchRes.status} ${text}`);
|
|
||||||
}
|
|
||||||
return (await batchRes.json()) as OpenAiBatchStatus;
|
return (await batchRes.json()) as OpenAiBatchStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchOpenAiBatchStatus(params: {
|
async function fetchOpenAiBatchStatus(params: {
|
||||||
openAi: OpenAiEmbeddingClient;
|
openAi: OpenAiEmbeddingClient;
|
||||||
batchId: string;
|
batchId: string;
|
||||||
|
debug?: (message: string, data?: Record<string, unknown>) => void;
|
||||||
}): Promise<OpenAiBatchStatus> {
|
}): Promise<OpenAiBatchStatus> {
|
||||||
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
||||||
const res = await fetch(`${baseUrl}/batches/${params.batchId}`, {
|
const res = await fetchOpenAiWithRetry({
|
||||||
headers: getOpenAiHeaders(params.openAi, { json: true }),
|
openAi: params.openAi,
|
||||||
|
url: `${baseUrl}/batches/${params.batchId}`,
|
||||||
|
init: { headers: getOpenAiHeaders(params.openAi, { json: true }) },
|
||||||
|
label: "status",
|
||||||
|
debug: params.debug,
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
|
||||||
const text = await res.text();
|
|
||||||
throw new Error(`openai batch status failed: ${res.status} ${text}`);
|
|
||||||
}
|
|
||||||
return (await res.json()) as OpenAiBatchStatus;
|
return (await res.json()) as OpenAiBatchStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchOpenAiFileContent(params: {
|
async function fetchOpenAiFileContent(params: {
|
||||||
openAi: OpenAiEmbeddingClient;
|
openAi: OpenAiEmbeddingClient;
|
||||||
fileId: string;
|
fileId: string;
|
||||||
|
debug?: (message: string, data?: Record<string, unknown>) => void;
|
||||||
}): Promise<string> {
|
}): Promise<string> {
|
||||||
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
||||||
const res = await fetch(`${baseUrl}/files/${params.fileId}/content`, {
|
const res = await fetchOpenAiWithRetry({
|
||||||
headers: getOpenAiHeaders(params.openAi, { json: true }),
|
openAi: params.openAi,
|
||||||
|
url: `${baseUrl}/files/${params.fileId}/content`,
|
||||||
|
init: { headers: getOpenAiHeaders(params.openAi, { json: true }) },
|
||||||
|
label: "file content",
|
||||||
|
debug: params.debug,
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
|
||||||
const text = await res.text();
|
|
||||||
throw new Error(`openai batch file content failed: ${res.status} ${text}`);
|
|
||||||
}
|
|
||||||
return await res.text();
|
return await res.text();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -154,11 +218,13 @@ function parseOpenAiBatchOutput(text: string): OpenAiBatchOutputLine[] {
|
|||||||
async function readOpenAiBatchError(params: {
|
async function readOpenAiBatchError(params: {
|
||||||
openAi: OpenAiEmbeddingClient;
|
openAi: OpenAiEmbeddingClient;
|
||||||
errorFileId: string;
|
errorFileId: string;
|
||||||
|
debug?: (message: string, data?: Record<string, unknown>) => void;
|
||||||
}): Promise<string | undefined> {
|
}): Promise<string | undefined> {
|
||||||
try {
|
try {
|
||||||
const content = await fetchOpenAiFileContent({
|
const content = await fetchOpenAiFileContent({
|
||||||
openAi: params.openAi,
|
openAi: params.openAi,
|
||||||
fileId: params.errorFileId,
|
fileId: params.errorFileId,
|
||||||
|
debug: params.debug,
|
||||||
});
|
});
|
||||||
const lines = parseOpenAiBatchOutput(content);
|
const lines = parseOpenAiBatchOutput(content);
|
||||||
const first = lines.find((line) => line.error?.message || line.response?.body?.error);
|
const first = lines.find((line) => line.error?.message || line.response?.body?.error);
|
||||||
@@ -191,6 +257,7 @@ async function waitForOpenAiBatch(params: {
|
|||||||
(await fetchOpenAiBatchStatus({
|
(await fetchOpenAiBatchStatus({
|
||||||
openAi: params.openAi,
|
openAi: params.openAi,
|
||||||
batchId: params.batchId,
|
batchId: params.batchId,
|
||||||
|
debug: params.debug,
|
||||||
}));
|
}));
|
||||||
const state = status.status ?? "unknown";
|
const state = status.status ?? "unknown";
|
||||||
if (state === "completed") {
|
if (state === "completed") {
|
||||||
@@ -204,7 +271,11 @@ async function waitForOpenAiBatch(params: {
|
|||||||
}
|
}
|
||||||
if (["failed", "expired", "cancelled", "canceled"].includes(state)) {
|
if (["failed", "expired", "cancelled", "canceled"].includes(state)) {
|
||||||
const detail = status.error_file_id
|
const detail = status.error_file_id
|
||||||
? await readOpenAiBatchError({ openAi: params.openAi, errorFileId: status.error_file_id })
|
? await readOpenAiBatchError({
|
||||||
|
openAi: params.openAi,
|
||||||
|
errorFileId: status.error_file_id,
|
||||||
|
debug: params.debug,
|
||||||
|
})
|
||||||
: undefined;
|
: undefined;
|
||||||
const suffix = detail ? `: ${detail}` : "";
|
const suffix = detail ? `: ${detail}` : "";
|
||||||
throw new Error(`openai batch ${params.batchId} ${state}${suffix}`);
|
throw new Error(`openai batch ${params.batchId} ${state}${suffix}`);
|
||||||
@@ -267,6 +338,7 @@ export async function runOpenAiEmbeddingBatches(params: {
|
|||||||
openAi: params.openAi,
|
openAi: params.openAi,
|
||||||
requests: group,
|
requests: group,
|
||||||
agentId: params.agentId,
|
agentId: params.agentId,
|
||||||
|
debug: params.debug,
|
||||||
});
|
});
|
||||||
if (!batchInfo.id) {
|
if (!batchInfo.id) {
|
||||||
throw new Error("openai batch create failed: missing batch id");
|
throw new Error("openai batch create failed: missing batch id");
|
||||||
@@ -308,6 +380,7 @@ export async function runOpenAiEmbeddingBatches(params: {
|
|||||||
const content = await fetchOpenAiFileContent({
|
const content = await fetchOpenAiFileContent({
|
||||||
openAi: params.openAi,
|
openAi: params.openAi,
|
||||||
fileId: completed.outputFileId,
|
fileId: completed.outputFileId,
|
||||||
|
debug: params.debug,
|
||||||
});
|
});
|
||||||
const outputLines = parseOpenAiBatchOutput(content);
|
const outputLines = parseOpenAiBatchOutput(content);
|
||||||
const errors: string[] = [];
|
const errors: string[] = [];
|
||||||
|
|||||||
Reference in New Issue
Block a user