4 месяцев назад · d53fcfd6a9
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -0,0 +1,8 @@
 
															+# 开发依赖 - Knowledge Agent 项目
														
 
															+# 用于开发时的类型检查和代码质量工具
														
 
															+
														
 
															+# 类型检查桩文件
														
 
															+types-requests==2.32.4.20250913
														
 
															+
														
 
															+# 安装方式：
														
 
															+# pip3 install -r requirements-dev.txt
														
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -0,0 +1,9 @@
 
															+# 可选依赖 - Knowledge Agent 项目
														
 
															+# 用于日志追踪和监控的可选功能
														
 
															+
														
 
															+# Logfire 日志追踪系统
														
 
															+logfire==4.14.2
														
 
															+logfire-api==4.14.2
														
 
															+
														
 
															+# 安装方式：
														
 
															+# pip3 install -r requirements-optional.txt
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,15 @@
 
															+# 核心依赖 - Knowledge Agent 项目
														
 
															+# 用于运行主要功能所需的必需依赖
														
 
															+
														
 
															+# OpenAI Agents 框架
														
 
															+openai-agents==0.4.2
														
 
															+
														
 
															+# OpenAI API 客户端
														
 
															+openai==2.6.1
														
 
															+
														
 
															+# 数据验证和设置管理
														
 
															+pydantic==2.12.3
														
 
															+pydantic-settings==2.11.0
														
 
															+
														
 
															+# HTTP 请求库
														
 
															+requests==2.32.5
														
--- a/sug_v6_1_2_115.py
+++ b/sug_v6_1_2_115.py
@@ -98,6 +98,10 @@ class RunContext(BaseModel):
 
															     # 最终结果
														
 
															     final_output: str | None = None
														
 
															+    # 评估缓存：避免重复评估相同文本
														
 
															+    evaluation_cache: dict[str, tuple[float, str]] = Field(default_factory=dict)
														
 
															+    # key: 文本, value: (score, reason)
														
 
															+
														
 
															 # ============================================================================
														
 
															 # Agent 定义
														
@@ -441,6 +445,13 @@ word_selection_instructions = """
 
															   * combined_query: 组合后的新query（只包含seed和word的原始文本）
														
 
															   * reasoning: 选择理由（说明为什么选这个词）
														
 
															 - overall_reasoning: 整体选择思路（说明这5个词的选择逻辑）
														
 
															+
														
 
															+## JSON输出规范
														
 
															+1. **格式要求**：必须输出标准的、完整的JSON格式
														
 
															+2. **字符限制**：不要在JSON中使用任何不可见的特殊字符或控制字符
														
 
															+3. **引号规范**：字符串中如需表达引用或强调，使用书名号《》或单书名号「」，不要使用英文引号或中文引号""
														
 
															+4. **编码规范**：所有文本使用UTF-8编码，不要包含二进制或转义序列
														
 
															+5. **完整性**：确保JSON的开始和结束括号完整匹配，所有字段都正确闭合
														
 
															 """.strip()
														
 
															 word_selector = Agent[None](
														
@@ -492,6 +503,13 @@ def calculate_final_score(motivation_score: float, category_score: float) -> flo
 
															     return base_score
														
 
															+def clean_json_string(text: str) -> str:
														
 
															+    """清理JSON中的非法控制字符（保留 \t \n \r）"""
														
 
															+    import re
														
 
															+    # 移除除了 \t(09) \n(0A) \r(0D) 之外的所有控制字符
														
 
															+    return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', text)
														
 
															+
														
 
															+
														
 
															 def process_note_data(note: dict) -> Post:
														
 
															     """处理搜索接口返回的帖子数据"""
														
 
															     note_card = note.get("note_card", {})
														
@@ -555,7 +573,7 @@ def process_note_data(note: dict) -> Post:
 
															     )
														
 
															-async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
														
 
															+async def evaluate_with_o(text: str, o: str, cache: dict[str, tuple[float, str]] | None = None) -> tuple[float, str]:
														
 
															     """评估文本与原始问题o的相关度
														
 
															     采用两阶段评估 + 代码计算规则：
														
@@ -563,9 +581,20 @@ async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
 
															     2. 品类维度评估（权重30%）
														
 
															     3. 应用规则A/B/C调整得分
														
 
															+    Args:
														
 
															+        text: 待评估的文本
														
 
															+        o: 原始问题
														
 
															+        cache: 评估缓存（可选），用于避免重复评估
														
 
															+
														
 
															     Returns:
														
 
															         tuple[float, str]: (最终相关度分数, 综合评估理由)
														
 
															     """
														
 
															+    # 检查缓存
														
 
															+    if cache is not None and text in cache:
														
 
															+        cached_score, cached_reason = cache[text]
														
 
															+        print(f"  ⚡ 缓存命中: {text} -> {cached_score:.2f}")
														
 
															+        return cached_score, cached_reason
														
 
															+
														
 
															     # 准备输入
														
 
															     eval_input = f"""
														
 
															 <原始问题>
														
@@ -630,6 +659,10 @@ async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
 
															                 elif motivation_score <= 0.2:
														
 
															                     combined_reason += "（应用规则B：动机低分限制机制）"
														
 
															+            # 存入缓存
														
 
															+            if cache is not None:
														
 
															+                cache[text] = (final_score, combined_reason)
														
 
															+
														
 
															             return final_score, combined_reason
														
 
															         except Exception as e:
														
@@ -684,7 +717,7 @@ async def initialize(o: str, context: RunContext) -> tuple[list[Seg], list[Word]
 
															     async def evaluate_seg(seg: Seg) -> Seg:
														
 
															         async with seg_semaphore:
														
 
															-            seg.score_with_o, seg.reason = await evaluate_with_o(seg.text, o)
														
 
															+            seg.score_with_o, seg.reason = await evaluate_with_o(seg.text, o, context.evaluation_cache)
														
 
															             return seg
														
 
															     if seg_list:
														
@@ -813,7 +846,7 @@ async def run_round(
 
															     async def evaluate_sug(sug: Sug) -> Sug:
														
 
															         async with semaphore:  # 限制并发数
														
 
															-            sug.score_with_o, sug.reason = await evaluate_with_o(sug.text, o)
														
 
															+            sug.score_with_o, sug.reason = await evaluate_with_o(sug.text, o, context.evaluation_cache)
														
 
															             return sug
														
 
															     if all_sugs:
														
@@ -887,6 +920,7 @@ async def run_round(
 
															     # 4. 构建q_list_next
														
 
															     print(f"\n[步骤4] 构建q_list_next...")
														
 
															     q_list_next = []
														
 
															+    existing_q_texts = set()  # 用于去重
														
 
															     add_word_details = {}  # 保存每个seed对应的组合词列表
														
 
															     all_seed_combinations = []  # 保存本轮所有seed的组合词（用于后续构建seed_list_next）
														
@@ -912,7 +946,7 @@ async def run_round(
 
															         print(f"      候选词数量: {len(candidate_words)}")
														
 
															-        # 调用Agent一次性选择并组合Top 5
														
 
															+        # 调用Agent一次性选择并组合Top 5（添加重试机制）
														
 
															         candidate_words_text = ', '.join([w.text for w in candidate_words])
														
 
															         selection_input = f"""
														
 
															 <原始问题>
														
@@ -929,15 +963,34 @@ async def run_round(
 
															 请从候选词列表中选择最多5个最合适的词，分别与当前seed组合成新的query。
														
 
															 """
														
 
															-        result = await Runner.run(word_selector, selection_input)
														
 
															-        selection_result: WordSelectionTop5 = result.final_output
														
 
															+
														
 
															+        # 重试机制
														
 
															+        max_retries = 2
														
 
															+        selection_result = None
														
 
															+        for attempt in range(max_retries):
														
 
															+            try:
														
 
															+                result = await Runner.run(word_selector, selection_input)
														
 
															+                selection_result = result.final_output
														
 
															+                break  # 成功则跳出
														
 
															+            except Exception as e:
														
 
															+                error_msg = str(e)
														
 
															+                if attempt < max_retries - 1:
														
 
															+                    print(f"      ⚠️  选词失败 (尝试 {attempt+1}/{max_retries}): {error_msg[:100]}")
														
 
															+                    await asyncio.sleep(1)
														
 
															+                else:
														
 
															+                    print(f"      ❌ 选词失败，跳过该seed: {error_msg[:100]}")
														
 
															+                    break
														
 
															+
														
 
															+        if selection_result is None:
														
 
															+            print(f"      跳过seed: {seed.text}")
														
 
															+            continue
														
 
															         print(f"      Agent选择了 {len(selection_result.combinations)} 个组合")
														
 
															         print(f"      整体选择思路: {selection_result.overall_reasoning}")
														
 
															         # 并发评估所有组合的相关度
														
 
															         async def evaluate_combination(comb: WordCombination) -> dict:
														
 
															-            score, reason = await evaluate_with_o(comb.combined_query, o)
														
 
															+            score, reason = await evaluate_with_o(comb.combined_query, o, context.evaluation_cache)
														
 
															             return {
														
 
															                 'word': comb.selected_word,
														
 
															                 'query': comb.combined_query,
														
@@ -951,8 +1004,13 @@ async def run_round(
 
															         print(f"      评估完成，得到 {len(top_5)} 个组合")
														
 
															-        # 将Top 5全部加入q_list_next
														
 
															+        # 将Top 5全部加入q_list_next（去重检查）
														
 
															         for comb in top_5:
														
 
															+            # 去重检查
														
 
															+            if comb['query'] in existing_q_texts:
														
 
															+                print(f"        ⊗ 跳过重复: {comb['query']}")
														
 
															+                continue
														
 
															+
														
 
															             print(f"        ✓ {comb['query']} (分数: {comb['score']:.2f})")
														
 
															             new_q = Q(
														
@@ -962,6 +1020,7 @@ async def run_round(
 
															                 from_source="add"
														
 
															             )
														
 
															             q_list_next.append(new_q)
														
 
															+            existing_q_texts.add(comb['query'])  # 记录到去重集合
														
 
															             # 记录已添加的词
														
 
															             seed.added_words.append(comb['word'])
														
@@ -980,10 +1039,15 @@ async def run_round(
 
															         # 保存到all_seed_combinations（用于构建seed_list_next）
														
 
															         all_seed_combinations.extend(top_5)
														
 
															-    # 4.2 对于sug_list_list中，每个sug大于来自的query分数，加到q_list_next
														
 
															+    # 4.2 对于sug_list_list中，每个sug大于来自的query分数，加到q_list_next（去重检查）
														
 
															     print(f"\n  4.2 将高分sug加入q_list_next...")
														
 
															     for sug in all_sugs:
														
 
															         if sug.from_q and sug.score_with_o > sug.from_q.score_with_o:
														
 
															+            # 去重检查
														
 
															+            if sug.text in existing_q_texts:
														
 
															+                print(f"    ⊗ 跳过重复: {sug.text}")
														
 
															+                continue
														
 
															+
														
 
															             new_q = Q(
														
 
															                 text=sug.text,
														
 
															                 score_with_o=sug.score_with_o,
														
@@ -991,6 +1055,7 @@ async def run_round(
 
															                 from_source="sug"
														
 
															             )
														
 
															             q_list_next.append(new_q)
														
 
															+            existing_q_texts.add(sug.text)  # 记录到去重集合
														
 
															             print(f"    ✓ {sug.text} (分数: {sug.score_with_o:.2f} > {sug.from_q.score_with_o:.2f})")
														
 
															     # 5. 构建seed_list_next（关键修改：不保留上一轮的seed）