yangxiaohui 1 ヶ月 前
コミット
e811a4e156
2 ファイル変更62 行追加22 行削除
  1. 39 15
      sug_v6_1_2_5.py
  2. 23 7
      visualize_v2.js

+ 39 - 15
sug_v6_1_2_5.py

@@ -36,22 +36,37 @@ class WordLibrary(BaseModel):
     """动态分词库"""
     words: set[str] = Field(default_factory=set)
     word_sources: dict[str, str] = Field(default_factory=dict)  # 记录词的来源:word -> source(note_id或"initial")
+    core_words: set[str] = Field(default_factory=set)  # 核心词(第一层初始分词)
 
-    def add_word(self, word: str, source: str = "unknown"):
+    def add_word(self, word: str, source: str = "unknown", is_core: bool = False):
         """添加单词到分词库"""
         if word and word.strip():
             word = word.strip()
             self.words.add(word)
             if word not in self.word_sources:
                 self.word_sources[word] = source
+            if is_core:
+                self.core_words.add(word)
 
-    def add_words(self, words: list[str], source: str = "unknown"):
+    def add_words(self, words: list[str], source: str = "unknown", is_core: bool = False):
         """批量添加单词"""
         for word in words:
-            self.add_word(word, source)
-
-    def get_unused_word(self, current_query: str) -> str | None:
-        """获取一个当前query中没有的词"""
+            self.add_word(word, source, is_core)
+
+    def get_unused_word(self, current_query: str, prefer_core: bool = True) -> str | None:
+        """获取一个当前query中没有的词
+
+        Args:
+            current_query: 当前查询
+            prefer_core: 是否优先返回核心词(默认True)
+        """
+        # 优先从核心词中查找
+        if prefer_core and self.core_words:
+            for word in self.core_words:
+                if word not in current_query:
+                    return word
+
+        # 如果核心词都用完了,或者不优先使用核心词,从所有词中查找
         for word in self.words:
             if word not in current_query:
                 return word
@@ -61,7 +76,8 @@ class WordLibrary(BaseModel):
         """序列化为dict"""
         return {
             "words": list(self.words),
-            "word_sources": self.word_sources
+            "word_sources": self.word_sources,
+            "core_words": list(self.core_words)
         }
 
 
@@ -458,9 +474,10 @@ async def initialize_word_library(original_query: str, context: RunContext) -> W
     segmentation: WordSegmentation = result.final_output
 
     word_lib = WordLibrary()
-    word_lib.add_words(segmentation.words, source="initial")
+    # 初始分词标记为核心词(is_core=True)
+    word_lib.add_words(segmentation.words, source="initial", is_core=True)
 
-    print(f"初始分词库: {list(word_lib.words)}")
+    print(f"初始分词库(核心词): {list(word_lib.words)}")
     print(f"分词理由: {segmentation.reasoning}")
 
     # 保存到context
@@ -713,9 +730,12 @@ async def process_suggestions(
         else:
             print(f"      ✗ 改写(同义): {rewrite_syn.rewritten_query} (分数: {rewrite_syn_eval.relevance_score:.2f}, 未提升)")
 
-    # 4. 加词策略
-    unused_word = word_lib.get_unused_word(query)
+    # 4. 加词策略(优先使用核心词)
+    unused_word = word_lib.get_unused_word(query, prefer_core=True)
+    is_core_word = unused_word in word_lib.core_words if unused_word else False
+
     if unused_word and len(new_queries) < 5:
+        word_type = "核心词" if is_core_word else "普通词"
         insertion_input = f"""
 <当前Query>
 {query}
@@ -733,10 +753,11 @@ async def process_suggestions(
         # 收集加词Agent的输入输出
         insertion_agent_call = {
             "agent": "加词位置评估专家",
-            "action": "加词",
+            "action": f"加词({word_type})",
             "input": {
                 "query": query,
-                "word_to_add": unused_word
+                "word_to_add": unused_word,
+                "is_core_word": is_core_word
             },
             "output": {
                 "new_query": insertion.new_query,
@@ -769,10 +790,10 @@ async def process_suggestions(
         )
 
         if insertion_eval.is_improved:
-            print(f"      ✓ 加词: {insertion.new_query} (分数: {insertion_eval.relevance_score:.2f})")
+            print(f"      ✓ 加词({word_type}): {insertion.new_query} [+{unused_word}] (分数: {insertion_eval.relevance_score:.2f})")
             new_queries.append(new_state)
         else:
-            print(f"      ✗ 加词: {insertion.new_query} (分数: {insertion_eval.relevance_score:.2f}, 未提升)")
+            print(f"      ✗ 加词({word_type}): {insertion.new_query} [+{unused_word}] (分数: {insertion_eval.relevance_score:.2f}, 未提升)")
 
     # 记录完整的suggestion分支处理结果(层级化)
     add_step(context, f"Suggestion分支 - {query}", "suggestion_branch", {
@@ -1333,6 +1354,7 @@ async def iterative_search_loop(
     print(f"迭代搜索完成")
     print(f"  总迭代次数: {iteration}")
     print(f"  最终满足帖子数: {len(all_satisfied_notes)}")
+    print(f"  核心词库: {list(word_lib.core_words)}")
     print(f"  最终分词库大小: {len(word_lib.words)}")
     print(f"{'='*60}")
 
@@ -1340,6 +1362,7 @@ async def iterative_search_loop(
     add_step(context, "迭代搜索完成", "loop_complete", {
         "total_iterations": iteration,
         "total_satisfied_notes": len(all_satisfied_notes),
+        "core_words": list(word_lib.core_words),
         "final_word_library_size": len(word_lib.words),
         "final_word_library": list(word_lib.words)
     })
@@ -1405,6 +1428,7 @@ async def main(input_dir: str, max_iterations: int = 20, visualize: bool = False
     # 格式化输出
     output = f"原始问题:{run_context.q}\n"
     output += f"找到满足需求的帖子:{len(satisfied_notes)} 个\n"
+    output += f"核心词库:{', '.join(run_context.word_library.get('core_words', []))}\n"
     output += f"分词库大小:{len(run_context.word_library.get('words', []))} 个词\n"
     output += "\n" + "="*60 + "\n"
 

+ 23 - 7
visualize_v2.js

@@ -604,12 +604,20 @@ function TreeNode({ node, level, children, isCollapsed, onToggle, isSelected, on
             alignItems: 'center',
             gap: '8px',
           }}>
+            {/* 节点类型图标 */}
+            <span style={{
+              fontSize: '12px',
+              flexShrink: 0,
+            }}>
+              {node.type === 'note' ? '📝' : '🔍'}
+            </span>
+
             <div style={{
               fontWeight: level === 0 ? '600' : '400',
               maxWidth: '180px',
               flex: 1,
               minWidth: 0,
-              color: node.data.isSelected === false ? '#ef4444' : '#374151',
+              color: (node.type === 'note' ? node.data.matchLevel === 'unsatisfied' : node.data.isSelected === false) ? '#ef4444' : '#374151',
             }}
             title={node.data.title || node.id}
             >
@@ -681,8 +689,8 @@ function getLayoutedElements(nodes, edges, direction = 'LR') {
     const isHorizontal = direction === 'LR';
     dagreGraph.setGraph({
       rankdir: direction,
-      nodesep: 50,   // 垂直间距
-      ranksep: 200,  // 水平间距
+      nodesep: 120,   // 垂直间距 - 增加以避免节点重叠
+      ranksep: 280,  // 水平间距 - 增加以容纳更宽的节点
     });
 
     // 添加节点 - 根据节点类型设置不同的尺寸
@@ -1040,8 +1048,8 @@ function FlowContent() {
 
         dagreGraph.setGraph({
           rankdir: 'LR',
-          nodesep: 50,   // 垂直间距
-          ranksep: 200,  // 水平间距
+          nodesep: 120,   // 垂直间距 - 增加以避免节点重叠
+          ranksep: 280,  // 水平间距 - 增加以容纳更宽的节点
         });
 
         visibleNodes.forEach((node) => {
@@ -1363,7 +1371,7 @@ function FlowContent() {
                           const nodeScore = node.data.score ? parseFloat(node.data.score) : 0;
                           const nodeStrategy = node.data.strategy || '';
                           const strategyColor = getStrategyColor(nodeStrategy);
-                          const nodeIsSelected = node.data.isSelected !== false;
+                          const nodeIsSelected = node.type === 'note' ? node.data.matchLevel !== 'unsatisfied' : node.data.isSelected !== false;
 
                           return (
                           <React.Fragment key={node.id + '-' + index}>
@@ -1431,7 +1439,7 @@ function FlowContent() {
                               }}
                               title={\`\${node.data.title || node.id} (Score: \${nodeScore.toFixed(2)}, Strategy: \${nodeStrategy}, Selected: \${nodeIsSelected})\`}
                             >
-                              {/* 上半部分:竖线 + 文字 + 分数 */}
+                              {/* 上半部分:竖线 + 图标 + 文字 + 分数 */}
                               <div style={{
                                 display: 'flex',
                                 alignItems: 'center',
@@ -1446,6 +1454,14 @@ function FlowContent() {
                                   flexShrink: 0,
                                 }} />
 
+                                {/* 节点类型图标 */}
+                                <span style={{
+                                  fontSize: '11px',
+                                  flexShrink: 0,
+                                }}>
+                                  {node.type === 'note' ? '📝' : '🔍'}
+                                </span>
+
                                 {/* 节点文字 */}
                                 <span style={{
                                   flex: 1,