From 0188240d6260a79c00761b7127815903213ebb88 Mon Sep 17 00:00:00 2001 From: Huarch Date: Thu, 4 Jun 2026 15:26:23 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E5=BB=BA=E4=BC=9A=E8=AF=9D=E8=AE=B0?= =?UTF-8?q?=E5=BD=95=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .opencode/tools/memory_manager.ts | 15 ++- src/learning/orchestrator.ts | 45 +++++---- src/memory/store.ts | 123 ++++++++++++++++++++++++- src/routes/chat.ts | 36 +++++--- src/routes/chatSession.ts | 54 ++++++++++- src/sessions/transcriptStore.ts | 13 +++ tests/memory/store.test.ts | 76 +++++++++++++++ tests/routes/chatSession.test.ts | 22 +++++ tests/sessions/transcriptStore.test.ts | 33 +++++++ 9 files changed, 375 insertions(+), 42 deletions(-) create mode 100644 tests/memory/store.test.ts diff --git a/.opencode/tools/memory_manager.ts b/.opencode/tools/memory_manager.ts index 22f7795..5512e09 100644 --- a/.opencode/tools/memory_manager.ts +++ b/.opencode/tools/memory_manager.ts @@ -11,7 +11,7 @@ const initializePromise = Promise.all([ export default tool({ description: - "管理长期有效的用户偏好或项目事实。支持 add/list/replace/remove。禁止写入 token、password、secret、system prompt 或一次性上下文。scope 仅允许 'user' 或 'workspace'。", + "管理长期有效的用户偏好或项目事实。支持 add/list/replace/remove。新增记忆前必须先查看同 scope 的现有记忆,避免写入近似重复项;如果已有相近内容,应优先 replace/remove 而不是重复 add。禁止写入 token、password、secret、system prompt 或一次性上下文。scope 仅允许 'user' 或 'workspace'。", args: { action: tool.schema .enum(["add", "list", "replace", "remove"]) @@ -95,9 +95,18 @@ export default tool({ return JSON.stringify({ ok: true, kind: "memory", - decision: result.changed ? "accepted" : "deduped", - detail: result.changed ? "memory stored" : "memory already existed", + decision: + result.changed + ? "accepted" + : result.detail === "memory already existed" + ? "deduped" + : "rejected", + detail: + result.detail === "similar memory already exists" + ? "similar memory already exists; review listed memories before storing a rewritten variant" + : result.detail, entry: result.entry, + existing_entry: result.similar, target: scope, }); } diff --git a/src/learning/orchestrator.ts b/src/learning/orchestrator.ts index 201b302..49740fa 100644 --- a/src/learning/orchestrator.ts +++ b/src/learning/orchestrator.ts @@ -339,28 +339,33 @@ export class LearningOrchestrator { source: "review" as const, traceId: input.requestContext.traceId, }; - const result = - proposal.action === "add" - ? await this.memoryStore.upsert(proposal.scope as MemoryScope, scopeKey, draft) - : proposal.action === "replace" - ? await this.memoryStore.replace( - proposal.scope as MemoryScope, - scopeKey, - proposal.target_id ?? "", - draft, - ) - : await this.memoryStore.remove( - proposal.scope as MemoryScope, - scopeKey, - proposal.target_id ?? "", - ); - const accepted = - "entry" in result ? Boolean(result.entry) : Boolean(result.changed); + let accepted = false; + let detail = "memory rejected"; + if (proposal.action === "add") { + const result = await this.memoryStore.upsert(proposal.scope as MemoryScope, scopeKey, draft); + accepted = Boolean(result.entry); + detail = result.detail; + } else if (proposal.action === "replace") { + const result = await this.memoryStore.replace( + proposal.scope as MemoryScope, + scopeKey, + proposal.target_id ?? "", + draft, + ); + accepted = Boolean(result.changed); + detail = result.detail; + } else { + const result = await this.memoryStore.remove( + proposal.scope as MemoryScope, + scopeKey, + proposal.target_id ?? "", + ); + accepted = Boolean(result.changed); + detail = result.detail; + } await writeLearningAuditLog({ action: `memory-${proposal.action}`, - detail: sanitizeAuditDetail( - "detail" in result ? result.detail : result.changed ? "memory stored" : "memory deduped", - ), + detail: sanitizeAuditDetail(detail), outcome: accepted ? "accepted" : "rejected", projectId: input.requestContext.projectId, proposal: sanitizeMemoryProposalForAudit(proposal), diff --git a/src/memory/store.ts b/src/memory/store.ts index ec66cb8..0c2d6b7 100644 --- a/src/memory/store.ts +++ b/src/memory/store.ts @@ -57,13 +57,33 @@ export class MemoryStore { return this.serializeWrite(async () => { const content = normalizeMemoryContent(draft.content); if (!content) { - return { changed: false, entry: null as MemoryEntry | null }; + return { + changed: false, + detail: "content rejected by persistence policy", + entry: null as MemoryEntry | null, + similar: null as MemoryEntry | null, + }; } const entries = await this.readEntries(scope, key); const existing = entries.find((entry) => entry.content === content); if (existing) { - return { changed: false, entry: existing }; + return { + changed: false, + detail: "memory already existed", + entry: existing, + similar: existing, + }; + } + + const similar = findSimilarMemory(entries, content); + if (similar) { + return { + changed: false, + detail: "similar memory already exists", + entry: similar, + similar, + }; } const entry: MemoryEntry = { @@ -80,7 +100,12 @@ export class MemoryStore { rootDir: this.baseDir, }, ); - return { changed: true, entry }; + return { + changed: true, + detail: "memory stored", + entry, + similar: null as MemoryEntry | null, + }; }); } @@ -105,6 +130,13 @@ export class MemoryStore { if (duplicate) { return { changed: false, detail: "replacement would duplicate an existing memory" }; } + const similar = findSimilarMemory(entries, content, entries[index]?.id); + if (similar) { + return { + changed: false, + detail: "replacement would overlap with a similar existing memory", + }; + } entries[index] = { content, id: entries[index]?.id ?? toStableId(scope, key, content.toLowerCase()), @@ -214,6 +246,91 @@ const normalizeMemoryContent = (content: string) => { return normalized; }; +const findSimilarMemory = ( + entries: MemoryEntry[], + content: string, + excludeId?: string, +) => + entries.find( + (entry) => entry.id !== excludeId && areSimilarMemoryContents(entry.content, content), + ) ?? null; + +const areSimilarMemoryContents = (left: string, right: string) => { + const normalizedLeft = normalizeComparableMemory(left); + const normalizedRight = normalizeComparableMemory(right); + if (!normalizedLeft || !normalizedRight) { + return false; + } + if (normalizedLeft === normalizedRight) { + return true; + } + + const [shorter, longer] = + normalizedLeft.length <= normalizedRight.length + ? [normalizedLeft, normalizedRight] + : [normalizedRight, normalizedLeft]; + if (shorter.length >= 12 && longer.includes(shorter)) { + return true; + } + if (shorter.length < 8) { + return false; + } + if ( + longestCommonSubsequenceLength(normalizedLeft, normalizedRight) / shorter.length >= 0.5 + ) { + return true; + } + + return ( + diceCoefficient(buildCharacterBigrams(normalizedLeft), buildCharacterBigrams(normalizedRight)) >= + 0.72 + ); +}; + +const normalizeComparableMemory = (content: string) => + normalizeMemoryContent(content) + .toLowerCase() + .replace(/[^\p{L}\p{N}]+/gu, ""); + +const buildCharacterBigrams = (content: string) => { + const grams = new Set(); + for (let index = 0; index < content.length - 1; index += 1) { + grams.add(content.slice(index, index + 2)); + } + return grams; +}; + +const diceCoefficient = (left: Set, right: Set) => { + if (left.size === 0 || right.size === 0) { + return 0; + } + let overlap = 0; + for (const item of left) { + if (right.has(item)) { + overlap += 1; + } + } + return (2 * overlap) / (left.size + right.size); +}; + +const longestCommonSubsequenceLength = (left: string, right: string) => { + const previous = new Array(right.length + 1).fill(0); + const current = new Array(right.length + 1).fill(0); + for (let leftIndex = 1; leftIndex <= left.length; leftIndex += 1) { + for (let rightIndex = 1; rightIndex <= right.length; rightIndex += 1) { + current[rightIndex] = + left[leftIndex - 1] === right[rightIndex - 1] + ? previous[rightIndex - 1] + 1 + : Math.max(previous[rightIndex], current[rightIndex - 1]); + } + for (let rightIndex = 0; rightIndex <= right.length; rightIndex += 1) { + previous[rightIndex] = current[rightIndex]; + current[rightIndex] = 0; + } + } + return previous[right.length]; +}; + const parseMemoryMarkdown = (content: string): MemoryEntry[] => content .split("\n") diff --git a/src/routes/chat.ts b/src/routes/chat.ts index 2a8102e..08164ae 100644 --- a/src/routes/chat.ts +++ b/src/routes/chat.ts @@ -15,6 +15,7 @@ import { type SessionRecord } from "../sessions/metadataStore.js"; import { toActorKey, toProjectKey } from "../utils/fileStore.js"; import { buildPromptWithLearningContext, + extractLatestFrontendTurn, generateSessionTitle, shouldGenerateSessionTitle, } from "./chatSession.js"; @@ -205,6 +206,26 @@ export const buildChatRouter = ( messages: parsed.data.messages, branchGroups: parsed.data.branch_groups, }); + const latestTurn = extractLatestFrontendTurn(parsed.data.messages); + if (latestTurn) { + void learningOrchestrator.onTurnCompleted({ + ...latestTurn, + requestContext: { + actorKey, + clientSessionId: nextRecord.sessionId, + projectId, + projectKey, + traceId: req.header("x-trace-id") ?? `save-${nextRecord.sessionId}`, + userId, + }, + sessionId: nextRecord.sessionId, + }).catch((error) => { + logger.warn( + { err: error, sessionId: nextRecord.sessionId }, + "post-save learning failed", + ); + }); + } res.json({ id: nextRecord.sessionId, title: nextRecord.title ?? "新对话", @@ -635,21 +656,6 @@ export const buildChatRouter = ( ); } } - if (assistantText) { - void learningOrchestrator.onTurnCompleted({ - assistantMessage: assistantText, - model: parsed.data.model, - requestContext, - sessionId: clientSessionId, - toolCallCount: streamResult.toolCallCount, - userMessage: parsed.data.message, - }).catch((error) => { - logger.warn( - { err: error, sessionId: clientSessionId }, - "post-turn learning failed", - ); - }); - } } } finally { sessionBridge.finalizeRequest(clientSessionId); diff --git a/src/routes/chatSession.ts b/src/routes/chatSession.ts index 1f8e550..12eecb8 100644 --- a/src/routes/chatSession.ts +++ b/src/routes/chatSession.ts @@ -254,6 +254,46 @@ const isObjectRecord = (value: unknown): value is Record => const isSyntheticAssistantError = (content: string) => /^⚠️\s*\*\*(请求已中断|错误[::]?)/.test(content); +export const extractLatestFrontendTurn = (messages: unknown[] | undefined) => { + if (!Array.isArray(messages) || messages.length === 0) { + return null; + } + + for (let index = messages.length - 1; index >= 0; index -= 1) { + const assistant = messages[index]; + if (!isObjectRecord(assistant) || assistant.role !== "assistant") { + continue; + } + const assistantMessage = + typeof assistant.content === "string" + ? assistant.content.replace(/\s+/g, " ").trim() + : ""; + if (!assistantMessage || isSyntheticAssistantError(assistantMessage)) { + continue; + } + + const user = messages + .slice(0, index) + .reverse() + .find((message) => isObjectRecord(message) && message.role === "user"); + if (!isObjectRecord(user) || typeof user.content !== "string") { + continue; + } + const userMessage = user.content.replace(/\s+/g, " ").trim(); + if (!userMessage) { + continue; + } + + return { + assistantMessage, + toolCallCount: estimateFrontendToolCallCount(assistant), + userMessage, + }; + } + + return null; +}; + const buildRestoredConversationFromMessages = (messages: unknown[] | undefined) => { if (!Array.isArray(messages) || messages.length === 0) { return ""; @@ -299,4 +339,16 @@ const buildRestoredConversationFromMessages = (messages: unknown[] | undefined) "以下为当前前端对话线程中最近的历史对话,请延续其中已确认的目标、约束、结论与引用结果。", trimmedConversation, ].join("\n"); -}; \ No newline at end of file +}; + +const estimateFrontendToolCallCount = (assistant: Record) => { + const progress = Array.isArray(assistant.progress) ? assistant.progress : []; + const artifacts = Array.isArray(assistant.artifacts) ? assistant.artifacts : []; + const toolProgressCount = progress.filter( + (item) => + isObjectRecord(item) && + (item.phase === "tool" || + (typeof item.id === "string" && item.id.startsWith("tool-"))), + ).length; + return Math.max(toolProgressCount, artifacts.length); +}; diff --git a/src/sessions/transcriptStore.ts b/src/sessions/transcriptStore.ts index d793dae..44449c6 100644 --- a/src/sessions/transcriptStore.ts +++ b/src/sessions/transcriptStore.ts @@ -77,6 +77,19 @@ export class SessionTranscriptStore { } const timestamp = new Date().toISOString(); + const lastTurn = transcript.turns.at(-1); + if ( + lastTurn?.userMessage === userMessage && + lastTurn.assistantMessage === assistantMessage + ) { + lastTurn.toolCallCount = Math.max(lastTurn.toolCallCount, turn.toolCallCount); + transcript.clientSessionId = context.clientSessionId ?? transcript.clientSessionId; + transcript.sessionId = context.sessionId; + transcript.updatedAt = timestamp; + await atomicWriteJson(key, transcript); + return transcript; + } + const record: SessionTurnRecord = { id: toStableId(context.sessionId, timestamp, userMessage, assistantMessage), assistantMessage, diff --git a/tests/memory/store.test.ts b/tests/memory/store.test.ts new file mode 100644 index 0000000..ff084c4 --- /dev/null +++ b/tests/memory/store.test.ts @@ -0,0 +1,76 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { MemoryStore } from "../../src/memory/store.js"; + +describe("MemoryStore", () => { + let tempDir: string; + let backupDir: string; + let store: MemoryStore; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), "tjwater-memory-")); + backupDir = await mkdtemp(join(tmpdir(), "tjwater-memory-backup-")); + store = new MemoryStore(tempDir, backupDir); + await store.initialize(); + }); + + afterEach(async () => { + await rm(tempDir, { force: true, recursive: true }); + await rm(backupDir, { force: true, recursive: true }); + }); + + it("dedupes exact duplicate memories", async () => { + const first = await store.upsert("workspace", "project-1", { + content: "DMA-2 nightly leakage analysis should compare against adjacent zones first.", + source: "tool", + }); + const second = await store.upsert("workspace", "project-1", { + content: "DMA-2 nightly leakage analysis should compare against adjacent zones first.", + source: "tool", + }); + + expect(first.changed).toBe(true); + expect(second.changed).toBe(false); + expect(second.detail).toBe("memory already existed"); + }); + + it("rejects rewritten memories that are too similar to an existing one", async () => { + await store.upsert("workspace", "project-1", { + content: "保存记忆前先查看当前 workspace memory,避免重复写入相同约束。", + source: "tool", + }); + + const result = await store.upsert("workspace", "project-1", { + content: "写入前先看一遍当前 workspace 记忆,避免把同样的约束重复保存进去。", + source: "tool", + }); + + expect(result.changed).toBe(false); + expect(result.detail).toBe("similar memory already exists"); + expect(result.entry?.content).toBe("保存记忆前先查看当前 workspace memory,避免重复写入相同约束。"); + }); + + it("rejects replace when the new content overlaps a similar existing memory", async () => { + const first = await store.upsert("user", "actor-1", { + content: "回答时默认使用中文,并保持结论先行。", + source: "tool", + }); + const second = await store.upsert("user", "actor-1", { + content: "回答要包含必要的文件路径引用。", + source: "tool", + }); + + const result = await store.replace("user", "actor-1", second.entry?.id ?? "", { + content: "默认使用中文回答,结论放在最前面。", + source: "tool", + }); + + expect(first.changed).toBe(true); + expect(second.changed).toBe(true); + expect(result.changed).toBe(false); + expect(result.detail).toBe("replacement would overlap with a similar existing memory"); + }); +}); diff --git a/tests/routes/chatSession.test.ts b/tests/routes/chatSession.test.ts index febf4f9..fecdf1c 100644 --- a/tests/routes/chatSession.test.ts +++ b/tests/routes/chatSession.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "bun:test"; import { buildPromptWithLearningContext, + extractLatestFrontendTurn, generateSessionTitle, shouldGenerateSessionTitle, } from "../../src/routes/chatSession.js"; @@ -161,3 +162,24 @@ describe("buildPromptWithLearningContext", () => { expect(prompt).toBe("基于刚才结果继续分析"); }); }); + +describe("extractLatestFrontendTurn", () => { + it("extracts the latest valid frontend user and assistant turn", () => { + const turn = extractLatestFrontendTurn([ + { role: "user", content: "检查 DMA-2 漏损" }, + { + role: "assistant", + content: "DMA-2 夜间最小流量持续抬升。", + progress: [{ id: "tool-dma", phase: "tool" }], + }, + { role: "user", content: "继续分析相邻分区" }, + { role: "assistant", content: "⚠️ **请求已中断**", isError: true }, + ]); + + expect(turn).toEqual({ + assistantMessage: "DMA-2 夜间最小流量持续抬升。", + toolCallCount: 1, + userMessage: "检查 DMA-2 漏损", + }); + }); +}); diff --git a/tests/sessions/transcriptStore.test.ts b/tests/sessions/transcriptStore.test.ts index 5b54893..3e06a83 100644 --- a/tests/sessions/transcriptStore.test.ts +++ b/tests/sessions/transcriptStore.test.ts @@ -135,4 +135,37 @@ describe("SessionTranscriptStore", () => { expect(forkRecentTurns).toHaveLength(1); expect(forkRecentTurns[0]?.assistantMessage).toBe("第一轮回复"); }); + + it("does not duplicate the latest turn when the frontend state is saved again", async () => { + await store.appendTurn( + { + actorKey: "actor-3", + clientSessionId: "thread-3", + projectKey: "project-3", + sessionId: "thread-3", + }, + { + assistantMessage: "已完成压力波动分析。", + toolCallCount: 1, + userMessage: "分析压力波动。", + }, + ); + + const transcript = await store.appendTurn( + { + actorKey: "actor-3", + clientSessionId: "thread-3", + projectKey: "project-3", + sessionId: "thread-3", + }, + { + assistantMessage: "已完成压力波动分析。", + toolCallCount: 2, + userMessage: "分析压力波动。", + }, + ); + + expect(transcript.turns).toHaveLength(1); + expect(transcript.turns[0]?.toolCallCount).toBe(2); + }); });