/** * 文本差异计算工具 * 用于对比原文和重写内容,生成差异片段 */ /** * 将文本按句子拆分,并记录每个句子的位置信息 * @param {string} text - 输入文本 * @returns {Array<{text: string, start: number, end: number}>} - 句子数组(含位置) */ export const splitIntoSentencesWithPosition = (text) => { if (!text) return [] const sentences = [] const regex = /[^。!?;\n]+[。!?;\n]*/g let match while ((match = regex.exec(text)) !== null) { const sentence = match[0].trim() if (sentence) { sentences.push({ text: sentence, start: match.index, end: match.index + match[0].length }) } } return sentences } /** * 将文本按句子拆分(简单版本,不含位置) * @param {string} text - 输入文本 * @returns {string[]} - 句子数组 */ export const splitIntoSentences = (text) => { return splitIntoSentencesWithPosition(text).map(s => s.text) } /** * 计算两个字符串的相似度(基于 Levenshtein 距离) * @param {string} s1 - 字符串1 * @param {string} s2 - 字符串2 * @returns {number} - 相似度 0-1 */ export const similarity = (s1, s2) => { if (!s1 && !s2) return 1 if (!s1 || !s2) return 0 const longer = s1.length > s2.length ? s1 : s2 const shorter = s1.length > s2.length ? s2 : s1 if (longer.length === 0) return 1 // 简化版:使用字符重叠率 const set1 = new Set(s1) const set2 = new Set(s2) const intersection = [...set1].filter(c => set2.has(c)).length const union = new Set([...set1, ...set2]).size return intersection / union } /** * 计算两段文本的差异(含位置信息) * @param {string} original - 原文 * @param {string} rewritten - 重写后的文本 * @returns {Array<{type: 'unchanged'|'modified'|'added'|'removed', original: string, rewritten: string, idx: number, originalStart?: number, originalEnd?: number}>} */ export const computeDiff = (original, rewritten) => { const originalSentences = splitIntoSentencesWithPosition(original) const rewrittenSentences = splitIntoSentencesWithPosition(rewritten) const diff = [] let oIdx = 0 let rIdx = 0 let diffIdx = 0 // 使用简单的贪心匹配算法 while (oIdx < originalSentences.length || rIdx < rewrittenSentences.length) { const oItem = originalSentences[oIdx] const rItem = rewrittenSentences[rIdx] const oSentence = oItem?.text const rSentence = rItem?.text if (oIdx >= originalSentences.length) { // 原文已结束,剩余都是新增 diff.push({ type: 'added', original: '', rewritten: rSentence, idx: diffIdx++, // 新增内容插入到最后一个原文句子之后 insertAfterIdx: originalSentences.length > 0 ? originalSentences[originalSentences.length - 1].end : 0 }) rIdx++ continue } if (rIdx >= rewrittenSentences.length) { // 重写已结束,剩余都是删除 diff.push({ type: 'removed', original: oSentence, rewritten: '', idx: diffIdx++, originalStart: oItem.start, originalEnd: oItem.end }) oIdx++ continue } // 计算相似度 const sim = similarity(oSentence, rSentence) if (sim > 0.8) { // 高度相似,视为未修改 diff.push({ type: 'unchanged', original: oSentence, rewritten: rSentence, idx: diffIdx++, originalStart: oItem.start, originalEnd: oItem.end }) oIdx++ rIdx++ } else if (sim > 0.4) { // 中等相似,视为修改 diff.push({ type: 'modified', original: oSentence, rewritten: rSentence, idx: diffIdx++, originalStart: oItem.start, originalEnd: oItem.end }) oIdx++ rIdx++ } else { // 低相似度,尝试向前查找匹配 let foundMatch = false // 在重写文本中向前查找原句的匹配 for (let i = rIdx + 1; i < Math.min(rIdx + 3, rewrittenSentences.length); i++) { if (similarity(oSentence, rewrittenSentences[i].text) > 0.6) { // 找到匹配,中间的都是新增 for (let j = rIdx; j < i; j++) { diff.push({ type: 'added', original: '', rewritten: rewrittenSentences[j].text, idx: diffIdx++, insertAfterIdx: oItem.start }) } rIdx = i foundMatch = true break } } if (!foundMatch) { // 在原文中向前查找重写句的匹配 for (let i = oIdx + 1; i < Math.min(oIdx + 3, originalSentences.length); i++) { if (similarity(originalSentences[i].text, rSentence) > 0.6) { // 找到匹配,中间的都是删除 for (let j = oIdx; j < i; j++) { diff.push({ type: 'removed', original: originalSentences[j].text, rewritten: '', idx: diffIdx++, originalStart: originalSentences[j].start, originalEnd: originalSentences[j].end }) } oIdx = i foundMatch = true break } } } if (!foundMatch) { // 没找到匹配,视为修改 diff.push({ type: 'modified', original: oSentence, rewritten: rSentence, idx: diffIdx++, originalStart: oItem.start, originalEnd: oItem.end }) oIdx++ rIdx++ } } } return diff } /** * 根据选中的修改应用差异,在原文中精确替换 * @param {string} original - 原文 * @param {Array} diffSegments - 差异片段(含位置信息) * @param {Set} acceptedChanges - 接受的修改索引集合 * @returns {string} - 精确替换后的文本 */ export const applySelectedChanges = (original, diffSegments, acceptedChanges) => { // 边界检查:如果没有选中任何修改,直接返回原文 if (!acceptedChanges || acceptedChanges.size === 0) { return original } // 边界检查:如果没有差异片段,直接返回原文 if (!diffSegments || diffSegments.length === 0) { return original } // 收集所有需要执行的替换操作 const operations = [] for (const segment of diffSegments) { const isAccepted = acceptedChanges.has(segment.idx) if (segment.type === 'modified' && isAccepted) { // 修改:替换原文中的对应位置 if (segment.originalStart !== undefined && segment.originalEnd !== undefined) { operations.push({ type: 'replace', start: segment.originalStart, end: segment.originalEnd, content: segment.rewritten }) } } else if (segment.type === 'removed' && isAccepted) { // 删除:移除原文中的对应位置 if (segment.originalStart !== undefined && segment.originalEnd !== undefined) { operations.push({ type: 'delete', start: segment.originalStart, end: segment.originalEnd }) } } else if (segment.type === 'added' && isAccepted) { // 新增:在指定位置插入 if (segment.insertAfterIdx !== undefined) { operations.push({ type: 'insert', position: segment.insertAfterIdx, content: segment.rewritten }) } } } // 如果没有有效的操作,返回原文 if (operations.length === 0) { return original } // 按位置从后往前排序(避免位置偏移) operations.sort((a, b) => { const posA = a.start !== undefined ? a.start : a.position const posB = b.start !== undefined ? b.start : b.position return posB - posA }) // 执行替换操作 let result = original for (const op of operations) { if (op.type === 'replace') { result = result.slice(0, op.start) + op.content + result.slice(op.end) } else if (op.type === 'delete') { result = result.slice(0, op.start) + result.slice(op.end) } else if (op.type === 'insert') { result = result.slice(0, op.position) + op.content + result.slice(op.position) } } // 最终检查:如果结果为空,返回原文(保护性措施) if (!result || result.trim() === '') { console.warn('applySelectedChanges: 结果为空,返回原文') return original } return result } /** * 获取差异统计 * @param {Array} diffSegments - 差异片段 * @returns {{total: number, modified: number, added: number, removed: number}} */ export const getDiffStats = (diffSegments) => { return { total: diffSegments.length, modified: diffSegments.filter(s => s.type === 'modified').length, added: diffSegments.filter(s => s.type === 'added').length, removed: diffSegments.filter(s => s.type === 'removed').length, unchanged: diffSegments.filter(s => s.type === 'unchanged').length } }