2 ヶ月前 · e811a4e156
--- a/sug_v6_1_2_5.py
+++ b/sug_v6_1_2_5.py
@@ -36,22 +36,37 @@ class WordLibrary(BaseModel):
 
				     """动态分词库"""
			
 
				     words: set[str] = Field(default_factory=set)
			
 
				     word_sources: dict[str, str] = Field(default_factory=dict)  # 记录词的来源：word -> source(note_id或"initial")
			
 
				+    core_words: set[str] = Field(default_factory=set)  # 核心词（第一层初始分词）
			
 
				 
			
 
				-    def add_word(self, word: str, source: str = "unknown"):
			
 
				+    def add_word(self, word: str, source: str = "unknown", is_core: bool = False):
			
 
				         """添加单词到分词库"""
			
 
				         if word and word.strip():
			
 
				             word = word.strip()
			
 
				             self.words.add(word)
			
 
				             if word not in self.word_sources:
			
 
				                 self.word_sources[word] = source
			
 
				+            if is_core:
			
 
				+                self.core_words.add(word)
			
 
				 
			
 
				-    def add_words(self, words: list[str], source: str = "unknown"):
			
 
				+    def add_words(self, words: list[str], source: str = "unknown", is_core: bool = False):
			
 
				         """批量添加单词"""
			
 
				         for word in words:
			
 
				-            self.add_word(word, source)
			
 
				-
			
 
				-    def get_unused_word(self, current_query: str) -> str | None:
			
 
				-        """获取一个当前query中没有的词"""
			
 
				+            self.add_word(word, source, is_core)
			
 
				+
			
 
				+    def get_unused_word(self, current_query: str, prefer_core: bool = True) -> str | None:
			
 
				+        """获取一个当前query中没有的词
			
 
				+
			
 
				+        Args:
			
 
				+            current_query: 当前查询
			
 
				+            prefer_core: 是否优先返回核心词（默认True）
			
 
				+        """
			
 
				+        # 优先从核心词中查找
			
 
				+        if prefer_core and self.core_words:
			
 
				+            for word in self.core_words:
			
 
				+                if word not in current_query:
			
 
				+                    return word
			
 
				+
			
 
				+        # 如果核心词都用完了，或者不优先使用核心词，从所有词中查找
			
 
				         for word in self.words:
			
 
				             if word not in current_query:
			
 
				                 return word
			
@@ -61,7 +76,8 @@ class WordLibrary(BaseModel):
 
				         """序列化为dict"""
			
 
				         return {
			
 
				             "words": list(self.words),
			
 
				-            "word_sources": self.word_sources
			
 
				+            "word_sources": self.word_sources,
			
 
				+            "core_words": list(self.core_words)
			
 
				         }
			
 
				 
			
 
				 
			
@@ -458,9 +474,10 @@ async def initialize_word_library(original_query: str, context: RunContext) -> W
 
				     segmentation: WordSegmentation = result.final_output
			
 
				 
			
 
				     word_lib = WordLibrary()
			
 
				-    word_lib.add_words(segmentation.words, source="initial")
			
 
				+    # 初始分词标记为核心词（is_core=True）
			
 
				+    word_lib.add_words(segmentation.words, source="initial", is_core=True)
			
 
				 
			
 
				-    print(f"初始分词库: {list(word_lib.words)}")
			
 
				+    print(f"初始分词库（核心词）: {list(word_lib.words)}")
			
 
				     print(f"分词理由: {segmentation.reasoning}")
			
 
				 
			
 
				     # 保存到context
			
@@ -713,9 +730,12 @@ async def process_suggestions(
 
				         else:
			
 
				             print(f"      ✗ 改写(同义): {rewrite_syn.rewritten_query} (分数: {rewrite_syn_eval.relevance_score:.2f}, 未提升)")
			
 
				 
			
 
				-    # 4. 加词策略
			
 
				-    unused_word = word_lib.get_unused_word(query)
			
 
				+    # 4. 加词策略（优先使用核心词）
			
 
				+    unused_word = word_lib.get_unused_word(query, prefer_core=True)
			
 
				+    is_core_word = unused_word in word_lib.core_words if unused_word else False
			
 
				+
			
 
				     if unused_word and len(new_queries) < 5:
			
 
				+        word_type = "核心词" if is_core_word else "普通词"
			
 
				         insertion_input = f"""
			
 
				 <当前Query>
			
 
				 {query}
			
@@ -733,10 +753,11 @@ async def process_suggestions(
 
				         # 收集加词Agent的输入输出
			
 
				         insertion_agent_call = {
			
 
				             "agent": "加词位置评估专家",
			
 
				-            "action": "加词",
			
 
				+            "action": f"加词（{word_type}）",
			
 
				             "input": {
			
 
				                 "query": query,
			
 
				-                "word_to_add": unused_word
			
 
				+                "word_to_add": unused_word,
			
 
				+                "is_core_word": is_core_word
			
 
				             },
			
 
				             "output": {
			
 
				                 "new_query": insertion.new_query,
			
@@ -769,10 +790,10 @@ async def process_suggestions(
 
				         )
			
 
				 
			
 
				         if insertion_eval.is_improved:
			
 
				-            print(f"      ✓ 加词: {insertion.new_query} (分数: {insertion_eval.relevance_score:.2f})")
			
 
				+            print(f"      ✓ 加词({word_type}): {insertion.new_query} [+{unused_word}] (分数: {insertion_eval.relevance_score:.2f})")
			
 
				             new_queries.append(new_state)
			
 
				         else:
			
 
				-            print(f"      ✗ 加词: {insertion.new_query} (分数: {insertion_eval.relevance_score:.2f}, 未提升)")
			
 
				+            print(f"      ✗ 加词({word_type}): {insertion.new_query} [+{unused_word}] (分数: {insertion_eval.relevance_score:.2f}, 未提升)")
			
 
				 
			
 
				     # 记录完整的suggestion分支处理结果（层级化）
			
 
				     add_step(context, f"Suggestion分支 - {query}", "suggestion_branch", {
			
@@ -1333,6 +1354,7 @@ async def iterative_search_loop(
 
				     print(f"迭代搜索完成")
			
 
				     print(f"  总迭代次数: {iteration}")
			
 
				     print(f"  最终满足帖子数: {len(all_satisfied_notes)}")
			
 
				+    print(f"  核心词库: {list(word_lib.core_words)}")
			
 
				     print(f"  最终分词库大小: {len(word_lib.words)}")
			
 
				     print(f"{'='*60}")
			
 
				 
			
@@ -1340,6 +1362,7 @@ async def iterative_search_loop(
 
				     add_step(context, "迭代搜索完成", "loop_complete", {
			
 
				         "total_iterations": iteration,
			
 
				         "total_satisfied_notes": len(all_satisfied_notes),
			
 
				+        "core_words": list(word_lib.core_words),
			
 
				         "final_word_library_size": len(word_lib.words),
			
 
				         "final_word_library": list(word_lib.words)
			
 
				     })
			
@@ -1405,6 +1428,7 @@ async def main(input_dir: str, max_iterations: int = 20, visualize: bool = False
 
				     # 格式化输出
			
 
				     output = f"原始问题：{run_context.q}\n"
			
 
				     output += f"找到满足需求的帖子：{len(satisfied_notes)} 个\n"
			
 
				+    output += f"核心词库：{', '.join(run_context.word_library.get('core_words', []))}\n"
			
 
				     output += f"分词库大小：{len(run_context.word_library.get('words', []))} 个词\n"
			
 
				     output += "\n" + "="*60 + "\n"
			
 
				 
			
--- a/visualize_v2.js
+++ b/visualize_v2.js
@@ -604,12 +604,20 @@ function TreeNode({ node, level, children, isCollapsed, onToggle, isSelected, on
 
				             alignItems: 'center',
			
 
				             gap: '8px',
			
 
				           }}>
			
 
				+            {/* 节点类型图标 */}
			
 
				+            <span style={{
			
 
				+              fontSize: '12px',
			
 
				+              flexShrink: 0,
			
 
				+            }}>
			
 
				+              {node.type === 'note' ? '📝' : '🔍'}
			
 
				+            </span>
			
 
				+
			
 
				             <div style={{
			
 
				               fontWeight: level === 0 ? '600' : '400',
			
 
				               maxWidth: '180px',
			
 
				               flex: 1,
			
 
				               minWidth: 0,
			
 
				-              color: node.data.isSelected === false ? '#ef4444' : '#374151',
			
 
				+              color: (node.type === 'note' ? node.data.matchLevel === 'unsatisfied' : node.data.isSelected === false) ? '#ef4444' : '#374151',
			
 
				             }}
			
 
				             title={node.data.title || node.id}
			
 
				             >
			
@@ -681,8 +689,8 @@ function getLayoutedElements(nodes, edges, direction = 'LR') {
 
				     const isHorizontal = direction === 'LR';
			
 
				     dagreGraph.setGraph({
			
 
				       rankdir: direction,
			
 
				-      nodesep: 50,   // 垂直间距
			
 
				-      ranksep: 200,  // 水平间距
			
 
				+      nodesep: 120,   // 垂直间距 - 增加以避免节点重叠
			
 
				+      ranksep: 280,  // 水平间距 - 增加以容纳更宽的节点
			
 
				     });
			
 
				 
			
 
				     // 添加节点 - 根据节点类型设置不同的尺寸
			
@@ -1040,8 +1048,8 @@ function FlowContent() {
 
				 
			
 
				         dagreGraph.setGraph({
			
 
				           rankdir: 'LR',
			
 
				-          nodesep: 50,   // 垂直间距
			
 
				-          ranksep: 200,  // 水平间距
			
 
				+          nodesep: 120,   // 垂直间距 - 增加以避免节点重叠
			
 
				+          ranksep: 280,  // 水平间距 - 增加以容纳更宽的节点
			
 
				         });
			
 
				 
			
 
				         visibleNodes.forEach((node) => {
			
@@ -1363,7 +1371,7 @@ function FlowContent() {
 
				                           const nodeScore = node.data.score ? parseFloat(node.data.score) : 0;
			
 
				                           const nodeStrategy = node.data.strategy || '';
			
 
				                           const strategyColor = getStrategyColor(nodeStrategy);
			
 
				-                          const nodeIsSelected = node.data.isSelected !== false;
			
 
				+                          const nodeIsSelected = node.type === 'note' ? node.data.matchLevel !== 'unsatisfied' : node.data.isSelected !== false;
			
 
				 
			
 
				                           return (
			
 
				                           <React.Fragment key={node.id + '-' + index}>
			
@@ -1431,7 +1439,7 @@ function FlowContent() {
 
				                               }}
			
 
				                               title={\`\${node.data.title || node.id} (Score: \${nodeScore.toFixed(2)}, Strategy: \${nodeStrategy}, Selected: \${nodeIsSelected})\`}
			
 
				                             >
			
 
				-                              {/* 上半部分：竖线 + 文字 + 分数 */}
			
 
				+                              {/* 上半部分：竖线 + 图标 + 文字 + 分数 */}
			
 
				                               <div style={{
			
 
				                                 display: 'flex',
			
 
				                                 alignItems: 'center',
			
@@ -1446,6 +1454,14 @@ function FlowContent() {
 
				                                   flexShrink: 0,
			
 
				                                 }} />
			
 
				 
			
 
				+                                {/* 节点类型图标 */}
			
 
				+                                <span style={{
			
 
				+                                  fontSize: '11px',
			
 
				+                                  flexShrink: 0,
			
 
				+                                }}>
			
 
				+                                  {node.type === 'note' ? '📝' : '🔍'}
			
 
				+                                </span>
			
 
				+
			
 
				                                 {/* 节点文字 */}
			
 
				                                 <span style={{
			
 
				                                   flex: 1,