- 新增 DocumentsPanel.vue 文稿管理页面 - 新增 MaterialsPanel.vue 素材库管理页面 - 新增 SettingsPanel.vue 设置页面 - 新增 DocumentSelectorModal.vue 文稿选择弹窗 - 新增 MaterialSelectorModal.vue 素材选择弹窗 - 集成 SQLite 数据库持久化 (sql.js) - 对照检查页面支持从文稿库选取内容 - 对照检查页面新增一键重写及差异对比功能 - 修复对照检查页面布局问题 - MainContent 支持文稿编辑功能
287 lines
8.3 KiB
JavaScript
287 lines
8.3 KiB
JavaScript
/**
|
|
* 文本差异计算工具
|
|
* 用于对比原文和重写内容,生成差异片段
|
|
*/
|
|
|
|
/**
|
|
* 将文本按句子拆分,并记录每个句子的位置信息
|
|
* @param {string} text - 输入文本
|
|
* @returns {Array<{text: string, start: number, end: number}>} - 句子数组(含位置)
|
|
*/
|
|
export const splitIntoSentencesWithPosition = (text) => {
|
|
if (!text) return []
|
|
|
|
const sentences = []
|
|
const regex = /[^。!?;\n]+[。!?;\n]*/g
|
|
let match
|
|
|
|
while ((match = regex.exec(text)) !== null) {
|
|
const sentence = match[0].trim()
|
|
if (sentence) {
|
|
sentences.push({
|
|
text: sentence,
|
|
start: match.index,
|
|
end: match.index + match[0].length
|
|
})
|
|
}
|
|
}
|
|
|
|
return sentences
|
|
}
|
|
|
|
/**
|
|
* 将文本按句子拆分(简单版本,不含位置)
|
|
* @param {string} text - 输入文本
|
|
* @returns {string[]} - 句子数组
|
|
*/
|
|
export const splitIntoSentences = (text) => {
|
|
return splitIntoSentencesWithPosition(text).map(s => s.text)
|
|
}
|
|
|
|
/**
|
|
* 计算两个字符串的相似度(基于 Levenshtein 距离)
|
|
* @param {string} s1 - 字符串1
|
|
* @param {string} s2 - 字符串2
|
|
* @returns {number} - 相似度 0-1
|
|
*/
|
|
export const similarity = (s1, s2) => {
|
|
if (!s1 && !s2) return 1
|
|
if (!s1 || !s2) return 0
|
|
|
|
const longer = s1.length > s2.length ? s1 : s2
|
|
const shorter = s1.length > s2.length ? s2 : s1
|
|
|
|
if (longer.length === 0) return 1
|
|
|
|
// 简化版:使用字符重叠率
|
|
const set1 = new Set(s1)
|
|
const set2 = new Set(s2)
|
|
const intersection = [...set1].filter(c => set2.has(c)).length
|
|
const union = new Set([...set1, ...set2]).size
|
|
|
|
return intersection / union
|
|
}
|
|
|
|
/**
|
|
* 计算两段文本的差异(含位置信息)
|
|
* @param {string} original - 原文
|
|
* @param {string} rewritten - 重写后的文本
|
|
* @returns {Array<{type: 'unchanged'|'modified'|'added'|'removed', original: string, rewritten: string, idx: number, originalStart?: number, originalEnd?: number}>}
|
|
*/
|
|
export const computeDiff = (original, rewritten) => {
|
|
const originalSentences = splitIntoSentencesWithPosition(original)
|
|
const rewrittenSentences = splitIntoSentencesWithPosition(rewritten)
|
|
|
|
const diff = []
|
|
let oIdx = 0
|
|
let rIdx = 0
|
|
let diffIdx = 0
|
|
|
|
// 使用简单的贪心匹配算法
|
|
while (oIdx < originalSentences.length || rIdx < rewrittenSentences.length) {
|
|
const oItem = originalSentences[oIdx]
|
|
const rItem = rewrittenSentences[rIdx]
|
|
const oSentence = oItem?.text
|
|
const rSentence = rItem?.text
|
|
|
|
if (oIdx >= originalSentences.length) {
|
|
// 原文已结束,剩余都是新增
|
|
diff.push({
|
|
type: 'added',
|
|
original: '',
|
|
rewritten: rSentence,
|
|
idx: diffIdx++,
|
|
// 新增内容插入到最后一个原文句子之后
|
|
insertAfterIdx: originalSentences.length > 0 ? originalSentences[originalSentences.length - 1].end : 0
|
|
})
|
|
rIdx++
|
|
continue
|
|
}
|
|
|
|
if (rIdx >= rewrittenSentences.length) {
|
|
// 重写已结束,剩余都是删除
|
|
diff.push({
|
|
type: 'removed',
|
|
original: oSentence,
|
|
rewritten: '',
|
|
idx: diffIdx++,
|
|
originalStart: oItem.start,
|
|
originalEnd: oItem.end
|
|
})
|
|
oIdx++
|
|
continue
|
|
}
|
|
|
|
// 计算相似度
|
|
const sim = similarity(oSentence, rSentence)
|
|
|
|
if (sim > 0.8) {
|
|
// 高度相似,视为未修改
|
|
diff.push({
|
|
type: 'unchanged',
|
|
original: oSentence,
|
|
rewritten: rSentence,
|
|
idx: diffIdx++,
|
|
originalStart: oItem.start,
|
|
originalEnd: oItem.end
|
|
})
|
|
oIdx++
|
|
rIdx++
|
|
} else if (sim > 0.4) {
|
|
// 中等相似,视为修改
|
|
diff.push({
|
|
type: 'modified',
|
|
original: oSentence,
|
|
rewritten: rSentence,
|
|
idx: diffIdx++,
|
|
originalStart: oItem.start,
|
|
originalEnd: oItem.end
|
|
})
|
|
oIdx++
|
|
rIdx++
|
|
} else {
|
|
// 低相似度,尝试向前查找匹配
|
|
let foundMatch = false
|
|
|
|
// 在重写文本中向前查找原句的匹配
|
|
for (let i = rIdx + 1; i < Math.min(rIdx + 3, rewrittenSentences.length); i++) {
|
|
if (similarity(oSentence, rewrittenSentences[i].text) > 0.6) {
|
|
// 找到匹配,中间的都是新增
|
|
for (let j = rIdx; j < i; j++) {
|
|
diff.push({
|
|
type: 'added',
|
|
original: '',
|
|
rewritten: rewrittenSentences[j].text,
|
|
idx: diffIdx++,
|
|
insertAfterIdx: oItem.start
|
|
})
|
|
}
|
|
rIdx = i
|
|
foundMatch = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if (!foundMatch) {
|
|
// 在原文中向前查找重写句的匹配
|
|
for (let i = oIdx + 1; i < Math.min(oIdx + 3, originalSentences.length); i++) {
|
|
if (similarity(originalSentences[i].text, rSentence) > 0.6) {
|
|
// 找到匹配,中间的都是删除
|
|
for (let j = oIdx; j < i; j++) {
|
|
diff.push({
|
|
type: 'removed',
|
|
original: originalSentences[j].text,
|
|
rewritten: '',
|
|
idx: diffIdx++,
|
|
originalStart: originalSentences[j].start,
|
|
originalEnd: originalSentences[j].end
|
|
})
|
|
}
|
|
oIdx = i
|
|
foundMatch = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!foundMatch) {
|
|
// 没找到匹配,视为修改
|
|
diff.push({
|
|
type: 'modified',
|
|
original: oSentence,
|
|
rewritten: rSentence,
|
|
idx: diffIdx++,
|
|
originalStart: oItem.start,
|
|
originalEnd: oItem.end
|
|
})
|
|
oIdx++
|
|
rIdx++
|
|
}
|
|
}
|
|
}
|
|
|
|
return diff
|
|
}
|
|
|
|
/**
|
|
* 根据选中的修改应用差异,在原文中精确替换
|
|
* @param {string} original - 原文
|
|
* @param {Array} diffSegments - 差异片段(含位置信息)
|
|
* @param {Set<number>} acceptedChanges - 接受的修改索引集合
|
|
* @returns {string} - 精确替换后的文本
|
|
*/
|
|
export const applySelectedChanges = (original, diffSegments, acceptedChanges) => {
|
|
// 收集所有需要执行的替换操作
|
|
const operations = []
|
|
|
|
for (const segment of diffSegments) {
|
|
const isAccepted = acceptedChanges.has(segment.idx)
|
|
|
|
if (segment.type === 'modified' && isAccepted) {
|
|
// 修改:替换原文中的对应位置
|
|
if (segment.originalStart !== undefined && segment.originalEnd !== undefined) {
|
|
operations.push({
|
|
type: 'replace',
|
|
start: segment.originalStart,
|
|
end: segment.originalEnd,
|
|
content: segment.rewritten
|
|
})
|
|
}
|
|
} else if (segment.type === 'removed' && isAccepted) {
|
|
// 删除:移除原文中的对应位置
|
|
if (segment.originalStart !== undefined && segment.originalEnd !== undefined) {
|
|
operations.push({
|
|
type: 'delete',
|
|
start: segment.originalStart,
|
|
end: segment.originalEnd
|
|
})
|
|
}
|
|
} else if (segment.type === 'added' && isAccepted) {
|
|
// 新增:在指定位置插入
|
|
if (segment.insertAfterIdx !== undefined) {
|
|
operations.push({
|
|
type: 'insert',
|
|
position: segment.insertAfterIdx,
|
|
content: segment.rewritten
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// 按位置从后往前排序(避免位置偏移)
|
|
operations.sort((a, b) => {
|
|
const posA = a.start !== undefined ? a.start : a.position
|
|
const posB = b.start !== undefined ? b.start : b.position
|
|
return posB - posA
|
|
})
|
|
|
|
// 执行替换操作
|
|
let result = original
|
|
for (const op of operations) {
|
|
if (op.type === 'replace') {
|
|
result = result.slice(0, op.start) + op.content + result.slice(op.end)
|
|
} else if (op.type === 'delete') {
|
|
result = result.slice(0, op.start) + result.slice(op.end)
|
|
} else if (op.type === 'insert') {
|
|
result = result.slice(0, op.position) + op.content + result.slice(op.position)
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
/**
|
|
* 获取差异统计
|
|
* @param {Array} diffSegments - 差异片段
|
|
* @returns {{total: number, modified: number, added: number, removed: number}}
|
|
*/
|
|
export const getDiffStats = (diffSegments) => {
|
|
return {
|
|
total: diffSegments.length,
|
|
modified: diffSegments.filter(s => s.type === 'modified').length,
|
|
added: diffSegments.filter(s => s.type === 'added').length,
|
|
removed: diffSegments.filter(s => s.type === 'removed').length,
|
|
unchanged: diffSegments.filter(s => s.type === 'unchanged').length
|
|
}
|
|
}
|