Files
ai-write/src/utils/textDiff.js
empty 1a1d7dabdf feat: 添加文稿管理、素材库、设置页面及对照检查重写功能
- 新增 DocumentsPanel.vue 文稿管理页面
- 新增 MaterialsPanel.vue 素材库管理页面
- 新增 SettingsPanel.vue 设置页面
- 新增 DocumentSelectorModal.vue 文稿选择弹窗
- 新增 MaterialSelectorModal.vue 素材选择弹窗
- 集成 SQLite 数据库持久化 (sql.js)
- 对照检查页面支持从文稿库选取内容
- 对照检查页面新增一键重写及差异对比功能
- 修复对照检查页面布局问题
- MainContent 支持文稿编辑功能
2026-01-09 00:21:52 +08:00

287 lines
8.3 KiB
JavaScript

/**
* 文本差异计算工具
* 用于对比原文和重写内容,生成差异片段
*/
/**
* 将文本按句子拆分,并记录每个句子的位置信息
* @param {string} text - 输入文本
* @returns {Array<{text: string, start: number, end: number}>} - 句子数组(含位置)
*/
export const splitIntoSentencesWithPosition = (text) => {
if (!text) return []
const sentences = []
const regex = /[^。!?;\n]+[。!?;\n]*/g
let match
while ((match = regex.exec(text)) !== null) {
const sentence = match[0].trim()
if (sentence) {
sentences.push({
text: sentence,
start: match.index,
end: match.index + match[0].length
})
}
}
return sentences
}
/**
* 将文本按句子拆分(简单版本,不含位置)
* @param {string} text - 输入文本
* @returns {string[]} - 句子数组
*/
export const splitIntoSentences = (text) => {
return splitIntoSentencesWithPosition(text).map(s => s.text)
}
/**
* 计算两个字符串的相似度(基于 Levenshtein 距离)
* @param {string} s1 - 字符串1
* @param {string} s2 - 字符串2
* @returns {number} - 相似度 0-1
*/
export const similarity = (s1, s2) => {
if (!s1 && !s2) return 1
if (!s1 || !s2) return 0
const longer = s1.length > s2.length ? s1 : s2
const shorter = s1.length > s2.length ? s2 : s1
if (longer.length === 0) return 1
// 简化版:使用字符重叠率
const set1 = new Set(s1)
const set2 = new Set(s2)
const intersection = [...set1].filter(c => set2.has(c)).length
const union = new Set([...set1, ...set2]).size
return intersection / union
}
/**
* 计算两段文本的差异(含位置信息)
* @param {string} original - 原文
* @param {string} rewritten - 重写后的文本
* @returns {Array<{type: 'unchanged'|'modified'|'added'|'removed', original: string, rewritten: string, idx: number, originalStart?: number, originalEnd?: number}>}
*/
export const computeDiff = (original, rewritten) => {
const originalSentences = splitIntoSentencesWithPosition(original)
const rewrittenSentences = splitIntoSentencesWithPosition(rewritten)
const diff = []
let oIdx = 0
let rIdx = 0
let diffIdx = 0
// 使用简单的贪心匹配算法
while (oIdx < originalSentences.length || rIdx < rewrittenSentences.length) {
const oItem = originalSentences[oIdx]
const rItem = rewrittenSentences[rIdx]
const oSentence = oItem?.text
const rSentence = rItem?.text
if (oIdx >= originalSentences.length) {
// 原文已结束,剩余都是新增
diff.push({
type: 'added',
original: '',
rewritten: rSentence,
idx: diffIdx++,
// 新增内容插入到最后一个原文句子之后
insertAfterIdx: originalSentences.length > 0 ? originalSentences[originalSentences.length - 1].end : 0
})
rIdx++
continue
}
if (rIdx >= rewrittenSentences.length) {
// 重写已结束,剩余都是删除
diff.push({
type: 'removed',
original: oSentence,
rewritten: '',
idx: diffIdx++,
originalStart: oItem.start,
originalEnd: oItem.end
})
oIdx++
continue
}
// 计算相似度
const sim = similarity(oSentence, rSentence)
if (sim > 0.8) {
// 高度相似,视为未修改
diff.push({
type: 'unchanged',
original: oSentence,
rewritten: rSentence,
idx: diffIdx++,
originalStart: oItem.start,
originalEnd: oItem.end
})
oIdx++
rIdx++
} else if (sim > 0.4) {
// 中等相似,视为修改
diff.push({
type: 'modified',
original: oSentence,
rewritten: rSentence,
idx: diffIdx++,
originalStart: oItem.start,
originalEnd: oItem.end
})
oIdx++
rIdx++
} else {
// 低相似度,尝试向前查找匹配
let foundMatch = false
// 在重写文本中向前查找原句的匹配
for (let i = rIdx + 1; i < Math.min(rIdx + 3, rewrittenSentences.length); i++) {
if (similarity(oSentence, rewrittenSentences[i].text) > 0.6) {
// 找到匹配,中间的都是新增
for (let j = rIdx; j < i; j++) {
diff.push({
type: 'added',
original: '',
rewritten: rewrittenSentences[j].text,
idx: diffIdx++,
insertAfterIdx: oItem.start
})
}
rIdx = i
foundMatch = true
break
}
}
if (!foundMatch) {
// 在原文中向前查找重写句的匹配
for (let i = oIdx + 1; i < Math.min(oIdx + 3, originalSentences.length); i++) {
if (similarity(originalSentences[i].text, rSentence) > 0.6) {
// 找到匹配,中间的都是删除
for (let j = oIdx; j < i; j++) {
diff.push({
type: 'removed',
original: originalSentences[j].text,
rewritten: '',
idx: diffIdx++,
originalStart: originalSentences[j].start,
originalEnd: originalSentences[j].end
})
}
oIdx = i
foundMatch = true
break
}
}
}
if (!foundMatch) {
// 没找到匹配,视为修改
diff.push({
type: 'modified',
original: oSentence,
rewritten: rSentence,
idx: diffIdx++,
originalStart: oItem.start,
originalEnd: oItem.end
})
oIdx++
rIdx++
}
}
}
return diff
}
/**
* 根据选中的修改应用差异,在原文中精确替换
* @param {string} original - 原文
* @param {Array} diffSegments - 差异片段(含位置信息)
* @param {Set<number>} acceptedChanges - 接受的修改索引集合
* @returns {string} - 精确替换后的文本
*/
export const applySelectedChanges = (original, diffSegments, acceptedChanges) => {
// 收集所有需要执行的替换操作
const operations = []
for (const segment of diffSegments) {
const isAccepted = acceptedChanges.has(segment.idx)
if (segment.type === 'modified' && isAccepted) {
// 修改:替换原文中的对应位置
if (segment.originalStart !== undefined && segment.originalEnd !== undefined) {
operations.push({
type: 'replace',
start: segment.originalStart,
end: segment.originalEnd,
content: segment.rewritten
})
}
} else if (segment.type === 'removed' && isAccepted) {
// 删除:移除原文中的对应位置
if (segment.originalStart !== undefined && segment.originalEnd !== undefined) {
operations.push({
type: 'delete',
start: segment.originalStart,
end: segment.originalEnd
})
}
} else if (segment.type === 'added' && isAccepted) {
// 新增:在指定位置插入
if (segment.insertAfterIdx !== undefined) {
operations.push({
type: 'insert',
position: segment.insertAfterIdx,
content: segment.rewritten
})
}
}
}
// 按位置从后往前排序(避免位置偏移)
operations.sort((a, b) => {
const posA = a.start !== undefined ? a.start : a.position
const posB = b.start !== undefined ? b.start : b.position
return posB - posA
})
// 执行替换操作
let result = original
for (const op of operations) {
if (op.type === 'replace') {
result = result.slice(0, op.start) + op.content + result.slice(op.end)
} else if (op.type === 'delete') {
result = result.slice(0, op.start) + result.slice(op.end)
} else if (op.type === 'insert') {
result = result.slice(0, op.position) + op.content + result.slice(op.position)
}
}
return result
}
/**
* 获取差异统计
* @param {Array} diffSegments - 差异片段
* @returns {{total: number, modified: number, added: number, removed: number}}
*/
export const getDiffStats = (diffSegments) => {
return {
total: diffSegments.length,
modified: diffSegments.filter(s => s.type === 'modified').length,
added: diffSegments.filter(s => s.type === 'added').length,
removed: diffSegments.filter(s => s.type === 'removed').length,
unchanged: diffSegments.filter(s => s.type === 'unchanged').length
}
}