refactor: simplify MEDIA parsing, drop invalid lines, keep valid tokens

This commit is contained in:
Peter Steinberger
2025-11-25 06:17:48 +01:00
parent 9f952f3ab8
commit d925d9849c

View File

@@ -1,12 +1,21 @@
// Shared helpers for parsing MEDIA tokens from command/stdout text. // Shared helpers for parsing MEDIA tokens from command/stdout text.
export const MEDIA_LINE_RE = /\bMEDIA:/i;
// Allow optional wrapping backticks and punctuation after the token; capture the core token. // Allow optional wrapping backticks and punctuation after the token; capture the core token.
export const MEDIA_TOKEN_RE = /\bMEDIA:\s*`?([^\s`]+)`?/i; export const MEDIA_TOKEN_RE = /\bMEDIA:\s*`?([^\s`]+)`?/i;
export function normalizeMediaSource(src: string) { export function normalizeMediaSource(src: string) {
if (src.startsWith("file://")) return src.replace("file://", ""); return src.startsWith("file://") ? src.replace("file://", "") : src;
return src; }
function cleanCandidate(raw: string) {
return raw.replace(/^[`"'[{(]+/, "").replace(/[`"'\\})\],]+$/, "");
}
function isValidMedia(candidate: string) {
if (!candidate) return false;
if (candidate.length > 1024) return false;
if (/\s/.test(candidate)) return false;
return /^https?:\/\//i.test(candidate) || candidate.startsWith("/") || candidate.startsWith("./");
} }
export function splitMediaFromOutput(raw: string): { export function splitMediaFromOutput(raw: string): {
@@ -14,52 +23,28 @@ export function splitMediaFromOutput(raw: string): {
mediaUrl?: string; mediaUrl?: string;
} { } {
const trimmedRaw = raw.trim(); const trimmedRaw = raw.trim();
let text = trimmedRaw; const match = MEDIA_TOKEN_RE.exec(trimmedRaw);
let mediaUrl: string | undefined; if (!match?.[1]) return { text: trimmedRaw };
const globalMatch = trimmedRaw.match(MEDIA_TOKEN_RE); const candidate = normalizeMediaSource(cleanCandidate(match[1]));
let mediaLine = trimmedRaw.split("\n").find((line) => MEDIA_LINE_RE.test(line)); const mediaUrl = isValidMedia(candidate) ? candidate : undefined;
let mediaMatch = mediaLine?.match(MEDIA_TOKEN_RE) ?? globalMatch;
if (!mediaMatch) {
return { text: trimmedRaw };
}
if (!mediaLine && mediaMatch) {
mediaLine = mediaMatch[0];
}
let isValidMedia = false; const cleanedText =
if (mediaMatch?.[1]) { mediaUrl
const cleaned = mediaMatch[1] ? trimmedRaw
.replace(/^[`"'[{(]+/, "") .replace(match[0], "")
.replace(/[`"'\\})\],]+$/, ""); .replace(/[ \t]+\n/g, "\n")
const candidate = normalizeMediaSource(cleaned); .replace(/[ \t]{2,}/g, " ")
const looksLikeUrl = /^https?:\/\//i.test(candidate); .replace(/\n{2,}/g, "\n")
const looksLikePath = candidate.startsWith("/") || candidate.startsWith("./"); .trim()
const hasWhitespace = /\s/.test(candidate); : trimmedRaw
isValidMedia = .split("\n")
!hasWhitespace && candidate.length <= 1024 && (looksLikeUrl || looksLikePath); .filter((line) => !MEDIA_TOKEN_RE.test(line))
if (isValidMedia) { .join("\n")
mediaUrl = candidate; .replace(/[ \t]+\n/g, "\n")
} .replace(/[ \t]{2,}/g, " ")
} .replace(/\n{2,}/g, "\n")
.trim();
if (isValidMedia && mediaMatch?.[0]) { return mediaUrl ? { text: cleanedText, mediaUrl } : { text: cleanedText };
text = trimmedRaw
.replace(mediaMatch[0], "")
.replace(/[ \t]{2,}/g, " ")
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{2,}/g, "\n")
.trim();
} else {
text = trimmedRaw
.split("\n")
.filter((line) => line !== mediaLine)
.join("\n")
.replace(/[ \t]{2,}/g, " ")
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{2,}/g, "\n")
.trim();
}
return { text, mediaUrl };
} }