hai 2 meses · d53fcfd6a9
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -0,0 +1,8 @@
 
				+# 开发依赖 - Knowledge Agent 项目
			
 
				+# 用于开发时的类型检查和代码质量工具
			
 
				+
			
 
				+# 类型检查桩文件
			
 
				+types-requests==2.32.4.20250913
			
 
				+
			
 
				+# 安装方式：
			
 
				+# pip3 install -r requirements-dev.txt
			
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -0,0 +1,9 @@
 
				+# 可选依赖 - Knowledge Agent 项目
			
 
				+# 用于日志追踪和监控的可选功能
			
 
				+
			
 
				+# Logfire 日志追踪系统
			
 
				+logfire==4.14.2
			
 
				+logfire-api==4.14.2
			
 
				+
			
 
				+# 安装方式：
			
 
				+# pip3 install -r requirements-optional.txt
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,15 @@
 
				+# 核心依赖 - Knowledge Agent 项目
			
 
				+# 用于运行主要功能所需的必需依赖
			
 
				+
			
 
				+# OpenAI Agents 框架
			
 
				+openai-agents==0.4.2
			
 
				+
			
 
				+# OpenAI API 客户端
			
 
				+openai==2.6.1
			
 
				+
			
 
				+# 数据验证和设置管理
			
 
				+pydantic==2.12.3
			
 
				+pydantic-settings==2.11.0
			
 
				+
			
 
				+# HTTP 请求库
			
 
				+requests==2.32.5
			
--- a/sug_v6_1_2_115.py
+++ b/sug_v6_1_2_115.py
@@ -98,6 +98,10 @@ class RunContext(BaseModel):
 
				     # 最终结果
			
 
				     final_output: str | None = None
			
 
				 
			
 
				+    # 评估缓存：避免重复评估相同文本
			
 
				+    evaluation_cache: dict[str, tuple[float, str]] = Field(default_factory=dict)
			
 
				+    # key: 文本, value: (score, reason)
			
 
				+
			
 
				 
			
 
				 # ============================================================================
			
 
				 # Agent 定义
			
@@ -441,6 +445,13 @@ word_selection_instructions = """
 
				   * combined_query: 组合后的新query（只包含seed和word的原始文本）
			
 
				   * reasoning: 选择理由（说明为什么选这个词）
			
 
				 - overall_reasoning: 整体选择思路（说明这5个词的选择逻辑）
			
 
				+
			
 
				+## JSON输出规范
			
 
				+1. **格式要求**：必须输出标准的、完整的JSON格式
			
 
				+2. **字符限制**：不要在JSON中使用任何不可见的特殊字符或控制字符
			
 
				+3. **引号规范**：字符串中如需表达引用或强调，使用书名号《》或单书名号「」，不要使用英文引号或中文引号""
			
 
				+4. **编码规范**：所有文本使用UTF-8编码，不要包含二进制或转义序列
			
 
				+5. **完整性**：确保JSON的开始和结束括号完整匹配，所有字段都正确闭合
			
 
				 """.strip()
			
 
				 
			
 
				 word_selector = Agent[None](
			
@@ -492,6 +503,13 @@ def calculate_final_score(motivation_score: float, category_score: float) -> flo
 
				     return base_score
			
 
				 
			
 
				 
			
 
				+def clean_json_string(text: str) -> str:
			
 
				+    """清理JSON中的非法控制字符（保留 \t \n \r）"""
			
 
				+    import re
			
 
				+    # 移除除了 \t(09) \n(0A) \r(0D) 之外的所有控制字符
			
 
				+    return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', text)
			
 
				+
			
 
				+
			
 
				 def process_note_data(note: dict) -> Post:
			
 
				     """处理搜索接口返回的帖子数据"""
			
 
				     note_card = note.get("note_card", {})
			
@@ -555,7 +573,7 @@ def process_note_data(note: dict) -> Post:
 
				     )
			
 
				 
			
 
				 
			
 
				-async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
			
 
				+async def evaluate_with_o(text: str, o: str, cache: dict[str, tuple[float, str]] | None = None) -> tuple[float, str]:
			
 
				     """评估文本与原始问题o的相关度
			
 
				 
			
 
				     采用两阶段评估 + 代码计算规则：
			
@@ -563,9 +581,20 @@ async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
 
				     2. 品类维度评估（权重30%）
			
 
				     3. 应用规则A/B/C调整得分
			
 
				 
			
 
				+    Args:
			
 
				+        text: 待评估的文本
			
 
				+        o: 原始问题
			
 
				+        cache: 评估缓存（可选），用于避免重复评估
			
 
				+
			
 
				     Returns:
			
 
				         tuple[float, str]: (最终相关度分数, 综合评估理由)
			
 
				     """
			
 
				+    # 检查缓存
			
 
				+    if cache is not None and text in cache:
			
 
				+        cached_score, cached_reason = cache[text]
			
 
				+        print(f"  ⚡ 缓存命中: {text} -> {cached_score:.2f}")
			
 
				+        return cached_score, cached_reason
			
 
				+
			
 
				     # 准备输入
			
 
				     eval_input = f"""
			
 
				 <原始问题>
			
@@ -630,6 +659,10 @@ async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
 
				                 elif motivation_score <= 0.2:
			
 
				                     combined_reason += "（应用规则B：动机低分限制机制）"
			
 
				 
			
 
				+            # 存入缓存
			
 
				+            if cache is not None:
			
 
				+                cache[text] = (final_score, combined_reason)
			
 
				+
			
 
				             return final_score, combined_reason
			
 
				 
			
 
				         except Exception as e:
			
@@ -684,7 +717,7 @@ async def initialize(o: str, context: RunContext) -> tuple[list[Seg], list[Word]
 
				 
			
 
				     async def evaluate_seg(seg: Seg) -> Seg:
			
 
				         async with seg_semaphore:
			
 
				-            seg.score_with_o, seg.reason = await evaluate_with_o(seg.text, o)
			
 
				+            seg.score_with_o, seg.reason = await evaluate_with_o(seg.text, o, context.evaluation_cache)
			
 
				             return seg
			
 
				 
			
 
				     if seg_list:
			
@@ -813,7 +846,7 @@ async def run_round(
 
				 
			
 
				     async def evaluate_sug(sug: Sug) -> Sug:
			
 
				         async with semaphore:  # 限制并发数
			
 
				-            sug.score_with_o, sug.reason = await evaluate_with_o(sug.text, o)
			
 
				+            sug.score_with_o, sug.reason = await evaluate_with_o(sug.text, o, context.evaluation_cache)
			
 
				             return sug
			
 
				 
			
 
				     if all_sugs:
			
@@ -887,6 +920,7 @@ async def run_round(
 
				     # 4. 构建q_list_next
			
 
				     print(f"\n[步骤4] 构建q_list_next...")
			
 
				     q_list_next = []
			
 
				+    existing_q_texts = set()  # 用于去重
			
 
				     add_word_details = {}  # 保存每个seed对应的组合词列表
			
 
				     all_seed_combinations = []  # 保存本轮所有seed的组合词（用于后续构建seed_list_next）
			
 
				 
			
@@ -912,7 +946,7 @@ async def run_round(
 
				 
			
 
				         print(f"      候选词数量: {len(candidate_words)}")
			
 
				 
			
 
				-        # 调用Agent一次性选择并组合Top 5
			
 
				+        # 调用Agent一次性选择并组合Top 5（添加重试机制）
			
 
				         candidate_words_text = ', '.join([w.text for w in candidate_words])
			
 
				         selection_input = f"""
			
 
				 <原始问题>
			
@@ -929,15 +963,34 @@ async def run_round(
 
				 
			
 
				 请从候选词列表中选择最多5个最合适的词，分别与当前seed组合成新的query。
			
 
				 """
			
 
				-        result = await Runner.run(word_selector, selection_input)
			
 
				-        selection_result: WordSelectionTop5 = result.final_output
			
 
				+
			
 
				+        # 重试机制
			
 
				+        max_retries = 2
			
 
				+        selection_result = None
			
 
				+        for attempt in range(max_retries):
			
 
				+            try:
			
 
				+                result = await Runner.run(word_selector, selection_input)
			
 
				+                selection_result = result.final_output
			
 
				+                break  # 成功则跳出
			
 
				+            except Exception as e:
			
 
				+                error_msg = str(e)
			
 
				+                if attempt < max_retries - 1:
			
 
				+                    print(f"      ⚠️  选词失败 (尝试 {attempt+1}/{max_retries}): {error_msg[:100]}")
			
 
				+                    await asyncio.sleep(1)
			
 
				+                else:
			
 
				+                    print(f"      ❌ 选词失败，跳过该seed: {error_msg[:100]}")
			
 
				+                    break
			
 
				+
			
 
				+        if selection_result is None:
			
 
				+            print(f"      跳过seed: {seed.text}")
			
 
				+            continue
			
 
				 
			
 
				         print(f"      Agent选择了 {len(selection_result.combinations)} 个组合")
			
 
				         print(f"      整体选择思路: {selection_result.overall_reasoning}")
			
 
				 
			
 
				         # 并发评估所有组合的相关度
			
 
				         async def evaluate_combination(comb: WordCombination) -> dict:
			
 
				-            score, reason = await evaluate_with_o(comb.combined_query, o)
			
 
				+            score, reason = await evaluate_with_o(comb.combined_query, o, context.evaluation_cache)
			
 
				             return {
			
 
				                 'word': comb.selected_word,
			
 
				                 'query': comb.combined_query,
			
@@ -951,8 +1004,13 @@ async def run_round(
 
				 
			
 
				         print(f"      评估完成，得到 {len(top_5)} 个组合")
			
 
				 
			
 
				-        # 将Top 5全部加入q_list_next
			
 
				+        # 将Top 5全部加入q_list_next（去重检查）
			
 
				         for comb in top_5:
			
 
				+            # 去重检查
			
 
				+            if comb['query'] in existing_q_texts:
			
 
				+                print(f"        ⊗ 跳过重复: {comb['query']}")
			
 
				+                continue
			
 
				+
			
 
				             print(f"        ✓ {comb['query']} (分数: {comb['score']:.2f})")
			
 
				 
			
 
				             new_q = Q(
			
@@ -962,6 +1020,7 @@ async def run_round(
 
				                 from_source="add"
			
 
				             )
			
 
				             q_list_next.append(new_q)
			
 
				+            existing_q_texts.add(comb['query'])  # 记录到去重集合
			
 
				 
			
 
				             # 记录已添加的词
			
 
				             seed.added_words.append(comb['word'])
			
@@ -980,10 +1039,15 @@ async def run_round(
 
				         # 保存到all_seed_combinations（用于构建seed_list_next）
			
 
				         all_seed_combinations.extend(top_5)
			
 
				 
			
 
				-    # 4.2 对于sug_list_list中，每个sug大于来自的query分数，加到q_list_next
			
 
				+    # 4.2 对于sug_list_list中，每个sug大于来自的query分数，加到q_list_next（去重检查）
			
 
				     print(f"\n  4.2 将高分sug加入q_list_next...")
			
 
				     for sug in all_sugs:
			
 
				         if sug.from_q and sug.score_with_o > sug.from_q.score_with_o:
			
 
				+            # 去重检查
			
 
				+            if sug.text in existing_q_texts:
			
 
				+                print(f"    ⊗ 跳过重复: {sug.text}")
			
 
				+                continue
			
 
				+
			
 
				             new_q = Q(
			
 
				                 text=sug.text,
			
 
				                 score_with_o=sug.score_with_o,
			
@@ -991,6 +1055,7 @@ async def run_round(
 
				                 from_source="sug"
			
 
				             )
			
 
				             q_list_next.append(new_q)
			
 
				+            existing_q_texts.add(sug.text)  # 记录到去重集合
			
 
				             print(f"    ✓ {sug.text} (分数: {sug.score_with_o:.2f} > {sug.from_q.score_with_o:.2f})")
			
 
				 
			
 
				     # 5. 构建seed_list_next（关键修改：不保留上一轮的seed）