3 hónapja · 4e35e13ee5
--- a/examples_how/overall_derivation/derivation_main.md
+++ b/examples_how/overall_derivation/derivation_main.md
@@ -1,5 +1,5 @@
 
				 ---
			
 
				-model: anthropic/claude-sonnet-4.6
			
 
				+model: google/gemini-3-flash-preview
			
 
				 temperature: 0.3
			
 
				 ---
			
 
				 
			
--- a/examples_how/overall_derivation/overall_derivation_agent_run.py
+++ b/examples_how/overall_derivation/overall_derivation_agent_run.py
@@ -303,7 +303,7 @@ async def main(account_name, post_id):
 
				         trace_store=store,
			
 
				         llm_call=create_openrouter_llm_call(model=model_id),
			
 
				         skills_dir=skills_dir,
			
 
				-        experiences_path="./.cache/experiences_overall_derivation.md",
			
 
				+        # experiences_path="./.cache/experiences_overall_derivation.md",
			
 
				         debug=True,
			
 
				     )
			
 
				 
			
@@ -421,7 +421,7 @@ async def main(account_name, post_id):
 
				                     if isinstance(item, Trace):
			
 
				                         current_trace_id = item.trace_id
			
 
				                         if item.status == "running":
			
 
				-                            print(f"[Trace] 开始: {item.trace_id[:8]}...")
			
 
				+                            print(f"[Trace] 开始: {item.trace_id[:100]}...")
			
 
				                         elif item.status == "completed":
			
 
				                             print("\n[Trace] ✅ 完成")
			
 
				                             print(f"  - Total messages: {item.total_messages}")
			
@@ -444,7 +444,7 @@ async def main(account_name, post_id):
 
				                                     print("\n[Response] Agent 回复：")
			
 
				                                     print(text)
			
 
				                                 elif text:
			
 
				-                                    preview = text[:150] + "..." if len(text) > 150 else text
			
 
				+                                    preview = text[:500] + "..." if len(text) > 500 else text
			
 
				                                     print(f"[Assistant] {preview}")
			
 
				                                 if tool_calls:
			
 
				                                     for tc in tool_calls:
			
@@ -460,7 +460,7 @@ async def main(account_name, post_id):
 
				                                 print(f"[Tool Result] ✅ {tool_name}")
			
 
				                                 print(f"  result: {tool_result}")
			
 
				                             if item.description:
			
 
				-                                desc = item.description[:80] if len(item.description) > 80 else item.description
			
 
				+                                desc = item.description[:500] if len(item.description) > 500 else item.description
			
 
				                                 print(f"  {desc}...")
			
 
				 
			
 
				             except Exception as e:
			
--- a/examples_how/overall_derivation/tools/find_pattern.py
+++ b/examples_how/overall_derivation/tools/find_pattern.py
@@ -4,11 +4,18 @@
 
				 功能：读取账号的 pattern 库，合并去重后按条件概率筛选，返回 topN 条 pattern（含 pattern 名称、条件概率）。
			
 
				 """
			
 
				 
			
 
				-import importlib.util
			
 
				 import json
			
 
				+import sys
			
 
				 from pathlib import Path
			
 
				 from typing import Any, Optional
			
 
				 
			
 
				+# 保证直接运行或作为包加载时都能解析 utils / tools（IDE 可跳转）
			
 
				+_root = Path(__file__).resolve().parent.parent
			
 
				+if str(_root) not in sys.path:
			
 
				+    sys.path.insert(0, str(_root))
			
 
				+from utils.conditional_ratio_calc import calc_pattern_conditional_ratio
			
 
				+from tools.point_match import _load_match_data, match_derivation_to_post_points
			
 
				+
			
 
				 try:
			
 
				     from agent.tools import tool, ToolResult, ToolContext
			
 
				 except ImportError:
			
@@ -27,28 +34,8 @@ TOP_KEYS = [
 
				 ]
			
 
				 SUB_KEYS = ["two_x", "one_x", "zero_x"]
			
 
				 
			
 
				-# 加载 conditional_ratio_calc（与 find_tree_node 一致）
			
 
				-_utils_dir = Path(__file__).resolve().parent.parent / "utils"
			
 
				-_cond_spec = importlib.util.spec_from_file_location(
			
 
				-    "conditional_ratio_calc",
			
 
				-    _utils_dir / "conditional_ratio_calc.py",
			
 
				-)
			
 
				-_cond_mod = importlib.util.module_from_spec(_cond_spec)
			
 
				-_cond_spec.loader.exec_module(_cond_mod)
			
 
				-calc_pattern_conditional_ratio = _cond_mod.calc_pattern_conditional_ratio
			
 
				-
			
 
				 _BASE_INPUT = Path(__file__).resolve().parent.parent / "input"
			
 
				 
			
 
				-# 加载 point_match（用于检查 pattern 元素是否匹配帖子选题点）
			
 
				-_point_match_spec = importlib.util.spec_from_file_location(
			
 
				-    "point_match",
			
 
				-    Path(__file__).resolve().parent / "point_match.py",
			
 
				-)
			
 
				-_point_match_mod = importlib.util.module_from_spec(_point_match_spec)
			
 
				-_point_match_spec.loader.exec_module(_point_match_mod)
			
 
				-_match_derivation_to_post_points = _point_match_mod.match_derivation_to_post_points
			
 
				-_load_match_data = _point_match_mod._load_match_data
			
 
				-
			
 
				 
			
 
				 def _pattern_file(account_name: str) -> Path:
			
 
				     """pattern 库文件：../input/{account_name}/原始数据/pattern/processed_edge_data.json"""
			
@@ -259,7 +246,7 @@ async def find_pattern(
 
				                     if elem and elem not in seen_elements:
			
 
				                         all_elements.append(elem)
			
 
				                         seen_elements.add(elem)
			
 
				-            matched_results = await _match_derivation_to_post_points(all_elements, account_name, post_id)
			
 
				+            matched_results = await match_derivation_to_post_points(all_elements, account_name, post_id)
			
 
				             elem_match_map: dict[str, list] = {}
			
 
				             for m in matched_results:
			
 
				                 elem_match_map.setdefault(m["推导选题点"], []).append({
			
--- a/examples_how/overall_derivation/tools/find_tree_node.py
+++ b/examples_how/overall_derivation/tools/find_tree_node.py
@@ -6,11 +6,18 @@
 
				 2. 获取符合条件概率阈值的节点（按条件概率排序返回 topN）
			
 
				 """
			
 
				 
			
 
				-import importlib.util
			
 
				 import json
			
 
				+import sys
			
 
				 from pathlib import Path
			
 
				 from typing import Any, Optional
			
 
				 
			
 
				+# 保证直接运行或作为包加载时都能解析 utils / tools（IDE 可跳转）
			
 
				+_root = Path(__file__).resolve().parent.parent
			
 
				+if str(_root) not in sys.path:
			
 
				+    sys.path.insert(0, str(_root))
			
 
				+from utils.conditional_ratio_calc import calc_node_conditional_ratio
			
 
				+from tools.point_match import match_derivation_to_post_points
			
 
				+
			
 
				 try:
			
 
				     from agent.tools import tool, ToolResult, ToolContext
			
 
				 except ImportError:
			
@@ -19,28 +26,9 @@ except ImportError:
 
				     ToolResult = None  # 仅用 main() 测核心逻辑时可无 agent
			
 
				     ToolContext = None
			
 
				 
			
 
				-# 加载同目录层级的 conditional_ratio_calc（不依赖包结构）
			
 
				-_utils_dir = Path(__file__).resolve().parent.parent / "utils"
			
 
				-_cond_spec = importlib.util.spec_from_file_location(
			
 
				-    "conditional_ratio_calc",
			
 
				-    _utils_dir / "conditional_ratio_calc.py",
			
 
				-)
			
 
				-_cond_mod = importlib.util.module_from_spec(_cond_spec)
			
 
				-_cond_spec.loader.exec_module(_cond_mod)
			
 
				-calc_node_conditional_ratio = _cond_mod.calc_node_conditional_ratio
			
 
				-
			
 
				 # 相对本文件：tools -> overall_derivation，input 在 overall_derivation 下
			
 
				 _BASE_INPUT = Path(__file__).resolve().parent.parent / "input"
			
 
				 
			
 
				-# 加载 point_match（用于检查节点是否匹配帖子选题点）
			
 
				-_point_match_spec = importlib.util.spec_from_file_location(
			
 
				-    "point_match",
			
 
				-    Path(__file__).resolve().parent / "point_match.py",
			
 
				-)
			
 
				-_point_match_mod = importlib.util.module_from_spec(_point_match_spec)
			
 
				-_point_match_spec.loader.exec_module(_point_match_mod)
			
 
				-_match_derivation_to_post_points = _point_match_mod.match_derivation_to_post_points
			
 
				-
			
 
				 
			
 
				 def _tree_dir(account_name: str) -> Path:
			
 
				     """人设树目录：../input/{account_name}/原始数据/tree/"""
			
@@ -217,7 +205,7 @@ async def find_tree_constant_nodes(
 
				         # 批量匹配所有节点与帖子选题点
			
 
				         if items and post_id:
			
 
				             node_names = [x["节点名称"] for x in items]
			
 
				-            matched_results = await _match_derivation_to_post_points(node_names, account_name, post_id)
			
 
				+            matched_results = await match_derivation_to_post_points(node_names, account_name, post_id)
			
 
				             node_match_map: dict[str, list] = {}
			
 
				             for m in matched_results:
			
 
				                 node_match_map.setdefault(m["推导选题点"], []).append({
			
@@ -303,7 +291,7 @@ async def find_tree_nodes_by_conditional_ratio(
 
				         # 批量匹配所有节点与帖子选题点
			
 
				         if items and post_id:
			
 
				             node_names = [x["节点名称"] for x in items]
			
 
				-            matched_results = await _match_derivation_to_post_points(node_names, account_name, post_id)
			
 
				+            matched_results = await match_derivation_to_post_points(node_names, account_name, post_id)
			
 
				             node_match_map: dict[str, list] = {}
			
 
				             for m in matched_results:
			
 
				                 node_match_map.setdefault(m["推导选题点"], []).append({
			
--- a/examples_how/overall_derivation/tools/point_match.py
+++ b/examples_how/overall_derivation/tools/point_match.py
@@ -4,11 +4,17 @@
 
				 功能：读取帖子选题点列表，与推导选题点做相似度计算，返回 combined_score >= 阈值的匹配对。
			
 
				 """
			
 
				 
			
 
				-import importlib.util
			
 
				 import json
			
 
				+import sys
			
 
				 from pathlib import Path
			
 
				 from typing import Any, List, Optional
			
 
				 
			
 
				+# 保证直接运行或作为包加载时都能解析 utils（IDE 可跳转）
			
 
				+_root = Path(__file__).resolve().parent.parent
			
 
				+if str(_root) not in sys.path:
			
 
				+    sys.path.insert(0, str(_root))
			
 
				+from utils.similarity_calc import similarity_matrix
			
 
				+
			
 
				 try:
			
 
				     from agent.tools import tool, ToolResult, ToolContext
			
 
				 except ImportError:
			
@@ -17,16 +23,6 @@ except ImportError:
 
				     ToolResult = None
			
 
				     ToolContext = None
			
 
				 
			
 
				-# 加载 similarity_calc
			
 
				-_utils_dir = Path(__file__).resolve().parent.parent / "utils"
			
 
				-_sim_spec = importlib.util.spec_from_file_location(
			
 
				-    "similarity_calc",
			
 
				-    _utils_dir / "similarity_calc.py",
			
 
				-)
			
 
				-_sim_mod = importlib.util.module_from_spec(_sim_spec)
			
 
				-_sim_spec.loader.exec_module(_sim_mod)
			
 
				-similarity_matrix = _sim_mod.similarity_matrix
			
 
				-
			
 
				 _BASE_INPUT = Path(__file__).resolve().parent.parent / "input"
			
 
				 
			
 
				 # 默认匹配阈值
			
--- a/examples_how/overall_derivation/utils/conditional_ratio_calc.py
+++ b/examples_how/overall_derivation/utils/conditional_ratio_calc.py
@@ -115,7 +115,7 @@ def calc_node_conditional_ratio(
 
				     set_p = set(p_pids)
			
 
				     den = len(derived_post_ids & set_p)
			
 
				     if den == 0:
			
 
				-        return 1.0
			
 
				+        return 0.0
			
 
				     num = len(derived_post_ids & set_n)
			
 
				     return min(1.0, num / den)
			
 
				 
			
--- a/examples_how/overall_derivation/utils/similarity_calc.py
+++ b/examples_how/overall_derivation/utils/similarity_calc.py
@@ -97,7 +97,7 @@ DEFAULT_BATCH_PROMPT_TEMPLATE = """
 
				 # Embedding 相似度 API
			
 
				 EMBEDDING_SIMILARITY_URL = "http://61.48.133.26:8187/cartesian_similarity"
			
 
				 # LLM 模型
			
 
				-LLM_MODEL = "openai/gpt-4.1-mini"
			
 
				+LLM_MODEL = "google/gemini-3-flash-preview"
			
 
				 
			
 
				 
			
 
				 def _phrase_pairs(phrases_a: List[str], phrases_b: List[str]) -> List[Tuple[str, str]]:
			
@@ -165,9 +165,12 @@ def _extract_json_array(content: str) -> List[dict]:
 
				     return json.loads(content)
			
 
				 
			
 
				 
			
 
				-async def _llm_similarity(phrases_a: List[str], phrases_b: List[str]) -> List[List[float]]:
			
 
				+async def _llm_similarity(
			
 
				+    phrases_a: List[str], phrases_b: List[str], *, use_cache: bool = True
			
 
				+) -> List[List[float]]:
			
 
				     """
			
 
				-    用 LLM 对短语对打分，返回 M×N 矩阵。先查原子缓存，仅对未命中的短语对调用 API。
			
 
				+    用 LLM 对短语对打分，返回 M×N 矩阵。use_cache=True 时先查原子缓存，仅对未命中的短语对调用 API；
			
 
				+    use_cache=False 时不读不写缓存。
			
 
				     """
			
 
				     if not phrases_a or not phrases_b:
			
 
				         return []
			
@@ -177,16 +180,18 @@ async def _llm_similarity(phrases_a: List[str], phrases_b: List[str]) -> List[Li
 
				     missing_indices: List[Tuple[int, int]] = []
			
 
				     for i in range(M):
			
 
				         for j in range(N):
			
 
				-            score = _read_atomic_score("llm", phrases_a[i], phrases_b[j])
			
 
				-            if score is not None:
			
 
				-                matrix[i][j] = score
			
 
				-            else:
			
 
				-                missing_indices.append((i, j))
			
 
				+            if use_cache:
			
 
				+                score = _read_atomic_score("llm", phrases_a[i], phrases_b[j])
			
 
				+                if score is not None:
			
 
				+                    matrix[i][j] = score
			
 
				+                    continue
			
 
				+            missing_indices.append((i, j))
			
 
				 
			
 
				     total = M * N
			
 
				-    hit_count = total - len(missing_indices)
			
 
				-    if hit_count > 0:
			
 
				-        logger.info("[similarity_matrix] LLM 原子缓存命中 %d/%d", hit_count, total)
			
 
				+    if use_cache:
			
 
				+        hit_count = total - len(missing_indices)
			
 
				+        if hit_count > 0:
			
 
				+            logger.info("[similarity_matrix] LLM 原子缓存命中 %d/%d", hit_count, total)
			
 
				     if not missing_indices:
			
 
				         return matrix
			
 
				 
			
@@ -213,8 +218,9 @@ async def _llm_similarity(phrases_a: List[str], phrases_b: List[str]) -> List[Li
 
				         score = float(items[idx].get("score", 0.0))
			
 
				         score = max(0.0, min(1.0, score))
			
 
				         matrix[i][j] = score
			
 
				-        a, b = phrases_a[i], phrases_b[j]
			
 
				-        _write_atomic_score("llm", a, b, score)
			
 
				+        if use_cache:
			
 
				+            a, b = phrases_a[i], phrases_b[j]
			
 
				+            _write_atomic_score("llm", a, b, score)
			
 
				     return matrix
			
 
				 
			
 
				 
			
@@ -224,6 +230,7 @@ async def similarity_matrix(
 
				     *,
			
 
				     embedding_weight: float = 0.5,
			
 
				     llm_weight: float = 0.5,
			
 
				+    use_llm_cache: bool = True,
			
 
				 ) -> List[SimilarityItem]:
			
 
				     """
			
 
				     计算两组短语的相似度，返回对象列表（每条含 phrase_a, phrase_b, embedding_score, llm_score, combined_score）。
			
@@ -236,6 +243,7 @@ async def similarity_matrix(
 
				         phrases_b: 第二组短语列表（N 个）
			
 
				         embedding_weight: embedding 权重，默认 0.5
			
 
				         llm_weight: LLM 权重，默认 0.5
			
 
				+        use_llm_cache: 是否使用 LLM 相似度缓存，默认 True
			
 
				 
			
 
				     Returns:
			
 
				         对象列表，长度 M×N，顺序与短语对 (a0,b0),(a0,b1),...,(aM-1,bN-1) 一致。
			
@@ -256,7 +264,7 @@ async def similarity_matrix(
 
				 
			
 
				     async def _run_llm() -> List[List[float]]:
			
 
				         t0 = time.perf_counter()
			
 
				-        out = await _llm_similarity(phrases_a, phrases_b)
			
 
				+        out = await _llm_similarity(phrases_a, phrases_b, use_cache=use_llm_cache)
			
 
				         logger.info("[similarity_matrix] LLM 耗时: %.3fs", time.perf_counter() - t0)
			
 
				         return out
			
 
				 
			
@@ -328,22 +336,28 @@ def test_extract_json_array() -> None:
 
				 
			
 
				 
			
 
				 async def test_similarity_matrix() -> None:
			
 
				-    """集成测试：调用 embedding + LLM 得到相似度对象列表。"""
			
 
				-    phrases_a = ["犬", "猫咪"]
			
 
				-    phrases_b = ["狗", "手机"]
			
 
				-    items = await similarity_matrix(phrases_a, phrases_b)
			
 
				-    assert len(items) == 4
			
 
				-    for row in items:
			
 
				-        assert "phrase_a" in row and "phrase_b" in row
			
 
				-        assert "embedding_score" in row and "llm_score" in row and "combined_score" in row
			
 
				-        assert 0 <= row["combined_score"] <= 1, f"combined_score 应在 [0,1]，得到 {row['combined_score']}"
			
 
				-    # 语义上 "犬"-"狗" 应高于 "犬"-"手机"
			
 
				-    dog_dog = next(r for r in items if r["phrase_a"] == "犬" and r["phrase_b"] == "狗")
			
 
				-    dog_phone = next(r for r in items if r["phrase_a"] == "犬" and r["phrase_b"] == "手机")
			
 
				-    assert dog_dog["combined_score"] > dog_phone["combined_score"], "犬-狗 应高于 犬-手机"
			
 
				-    print("test_similarity_matrix: ok")
			
 
				-    for r in items:
			
 
				-        print(f"  {r['phrase_a']}-{r['phrase_b']}: emb={r['embedding_score']:.4f} llm={r['llm_score']:.4f} combined={r['combined_score']:.4f}")
			
 
				+    """集成测试：调用 embedding + LLM 得到相似度对象列表。use_llm_cache 可控制是否使用 LLM 缓存。"""
			
 
				+    # use_llm_cache = True
			
 
				+    use_llm_cache = False
			
 
				+    phrases_a = ["犬", "猫咪", "夸张"]
			
 
				+    phrases_b = ["狗", "手机", "夸张堆叠"]
			
 
				+    items = await similarity_matrix(phrases_a, phrases_b, use_llm_cache=use_llm_cache)
			
 
				+    for item in items:
			
 
				+        print(item)
			
 
				+
			
 
				+
			
 
				+    # assert len(items) == 4
			
 
				+    # for row in items:
			
 
				+    #     assert "phrase_a" in row and "phrase_b" in row
			
 
				+    #     assert "embedding_score" in row and "llm_score" in row and "combined_score" in row
			
 
				+    #     assert 0 <= row["combined_score"] <= 1, f"combined_score 应在 [0,1]，得到 {row['combined_score']}"
			
 
				+    # # 语义上 "犬"-"狗" 应高于 "犬"-"手机"
			
 
				+    # dog_dog = next(r for r in items if r["phrase_a"] == "犬" and r["phrase_b"] == "狗")
			
 
				+    # dog_phone = next(r for r in items if r["phrase_a"] == "犬" and r["phrase_b"] == "手机")
			
 
				+    # assert dog_dog["combined_score"] > dog_phone["combined_score"], "犬-狗 应高于 犬-手机"
			
 
				+    # print("test_similarity_matrix: ok")
			
 
				+    # for r in items:
			
 
				+    #     print(f"  {r['phrase_a']}-{r['phrase_b']}: emb={r['embedding_score']:.4f} llm={r['llm_score']:.4f} combined={r['combined_score']:.4f}")
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":