liuzhiheng 1 месяц назад
Родитель
Сommit
c187619960

+ 1 - 1
examples_how/overall_derivation/derivation_main.md

@@ -487,7 +487,7 @@ agent(agent_type="derivation_search", task="执行搜索任务,account_name=xx
 #### 内部推导方法阈值动态调整
 内部推导方法二、三的 `conditional_ratio_threshold`(条件概率阈值)、`top_n`(最大返回记录条数)由 agent 动态调整:
 - `top_n` 最小设置 500,可按 500→1000→2000 间隔动态调整;方法二(账号 pattern 复用)的 `top_n` 最小设置 1000
-- 每轮可动态逐步降低条件概率阈值,或增大最大返回记录条数,尽可能召回更多数据、推导到更多匹配选题点
+- 每轮可动态逐步降低条件概率阈值(**但最小值不能低于 0.2**),或增大最大返回记录条数,尽可能召回更多数据、推导到更多匹配选题点
 
 ---
 

+ 10 - 3
examples_how/overall_derivation/generate_visualize_data.py

@@ -500,6 +500,13 @@ def main(account_name, post_id, log_id):
 
 if __name__ == "__main__":
     account_name="家有大志"
-    post_id = "68fb6a5c000000000302e5de"
-    log_id="20260317112639"
-    main(account_name, post_id, log_id)
+
+    items = [
+        {"post_id":"68fb6a5c000000000302e5de","log_id":"20260317214307"},
+        {"post_id":"69185d49000000000d00f94e","log_id":"20260317214841"},
+        {"post_id":"6921937a000000001b0278d1","log_id":"20260317215616"}
+    ]
+    for item in items:
+        post_id = item["post_id"]
+        log_id = item["log_id"]
+        main(account_name, post_id, log_id)

Разница между файлами не показана из-за своего большого размера
+ 440 - 401
examples_how/overall_derivation/input/家有大志/原始数据/pattern/processed_edge_data.json


+ 4 - 0
examples_how/overall_derivation/pattern_data_process.py

@@ -18,6 +18,10 @@ TOP_KEYS = [
     "depth_max_concrete",
     "depth2_medium",
     "depth1_abstract",
+    "depth_max_minus_1",
+    "depth_max_minus_2",
+    "depth_3",
+    "depth_4",
 ]
 SUB_KEYS = ["two_x", "one_x", "zero_x"]
 

+ 5 - 1
examples_how/overall_derivation/tools/find_pattern.py

@@ -32,6 +32,10 @@ TOP_KEYS = [
     "depth_max_concrete",
     "depth2_medium",
     "depth1_abstract",
+    "depth_max_minus_1",
+    "depth_max_minus_2",
+    "depth_3",
+    "depth_4",
 ]
 SUB_KEYS = ["two_x", "one_x", "zero_x"]
 
@@ -418,7 +422,7 @@ def main() -> None:
     #     {"topic": "叙事结构", "source_node": "叙事结构"},
     # ]
     derived_items = derived_items = [{"source_node":"分享","topic":"分享"},{"source_node":"叙事结构","topic":"叙事结构"},{"source_node":"图片文字","topic":"图片文字"},{"source_node":"补充说明式","topic":"补充说明式"},{"source_node":"幽默化标题","topic":"幽默化标题"},{"source_node":"标题","topic":"标题"}]
-    conditional_ratio_threshold = 0.01
+    conditional_ratio_threshold = 0.2
     top_n = 2000
 
     # 1)直接调用核心函数(不含帖子匹配,仅验证排序逻辑)

+ 1 - 1
examples_how/overall_derivation/tools/find_tree_node.py

@@ -344,7 +344,7 @@ def main() -> None:
     #     {"topic": "叙事结构", "source_node": "叙事结构"},
     # ]
     derived_items = [{"source_node":"分享","topic":"分享"},{"source_node":"叙事结构","topic":"叙事结构"},{"source_node":"图片文字","topic":"图片文字"},{"source_node":"补充说明式","topic":"补充说明式"},{"source_node":"幽默化标题","topic":"幽默化标题"},{"source_node":"标题","topic":"标题"}]
-    conditional_ratio_threshold = 0.01
+    conditional_ratio_threshold = 0.2
     top_n = 1000
 
     # # 1)常量节点(核心函数,无匹配)

+ 221 - 221
examples_how/overall_derivation/tools/pattern_dimension_analyze.py

@@ -1,15 +1,16 @@
 """
-Pattern 维度聚类分析 Tool
+Pattern 维度分析 Tool
 
 功能概述:
-1. 读取某次整体推导日志目录下各轮评估结果,累计每轮已匹配的 matched_post_point。
-2. 基于帖子与 pattern 库的匹配结果,对 pattern 元素做打分与分类(已推导/未推导)。
-3. 在账号人设树(实质/形式/意图)中,分别为「已推导元素」「未推导元素」寻找聚类节点
+1. 读取某次整体推导日志目录下各轮评估结果,累计 matched_post_point / derivation_output_point 等字段
+2. 每轮通过 derivation_output_point 在人设树中找到 cluster_level 层祖先节点(已推导维度节点集合)。
+3. 从 deduped_patterns 中筛选包含已推导维度节点的 pattern,并对各元素标记是否已推导
 
 输入参数:
 - account_name: 账号名称
 - post_id: 帖子 ID
 - log_id: 推导日志目录名(形如 20260313210921)
+- cluster_level: 在人设树中查找祖先节点的目标深度(root 为 0 层)
 """
 
 import json
@@ -22,7 +23,6 @@ _root = Path(__file__).resolve().parent.parent
 if str(_root) not in sys.path:
     sys.path.insert(0, str(_root))
 
-from tools.point_match import _load_match_data  # 帖子选题点与人设树节点匹配分
 from tools.find_tree_node import _load_trees    # 加载三棵人设树
 
 
@@ -58,8 +58,18 @@ def _load_round_matched_points(
     [
       {
         "round": 1,
-        "round_points": [... 本轮 matched_post_point 去重 ...],
-        "cumulative_points": [... 累计到本轮的 matched_post_point 去重 ...],
+        "round_points": [
+          {
+            "matched_post_point": "叙事结构",
+            "derivation_output_point": "叙事编排",
+            "matched_score": 0.9151,
+            "is_fully_derived": true,
+          },
+          ...
+        ],
+        "cumulative_points": [
+          ... 累计到本轮的去重列表(以 derivation_output_point 为去重 key) ...
+        ],
       },
       ...
     ]
@@ -83,8 +93,8 @@ def _load_round_matched_points(
 
     eval_files.sort(key=lambda x: x[0])
     results: List[Dict[str, Any]] = []
-    cumulative: List[str] = []
-    cumulative_set: Set[str] = set()
+    cumulative: List[Dict[str, Any]] = []
+    cumulative_set: Set[str] = set()  # 以 derivation_output_point 去重
 
     for r, path in eval_files:
         try:
@@ -93,34 +103,42 @@ def _load_round_matched_points(
         except Exception:
             continue
         eval_results = data.get("eval_results") or []
-        round_points: List[str] = []
+        round_points: List[Dict[str, Any]] = []
+        seen_in_round: Set[str] = set()
+
         for item in eval_results:
             if not isinstance(item, dict):
                 continue
             if not item.get("is_matched"):
                 continue
 
-            # 根据是否已完全推导,选择不同的帖子选题点字段:
-            # - is_fully_derived 为 False 时,使用 derivation_output_point
-            # - 其他情况(True 或缺失)使用 matched_post_point(兼容旧数据)
-            if item.get("is_fully_derived") is False:
-                mp = item.get("derivation_output_point")
-            else:
-                mp = item.get("matched_post_point")
-
-            if mp is None:
+            dop = item.get("derivation_output_point")
+            if dop is None:
                 continue
-            mp = str(mp).strip()
-            if not mp:
+            dop = str(dop).strip()
+            if not dop:
                 continue
-            if mp not in round_points:
-                round_points.append(mp)
 
-        # 累加到本轮
-        for mp in round_points:
-            if mp not in cumulative_set:
-                cumulative_set.add(mp)
-                cumulative.append(mp)
+            # 本轮内按 derivation_output_point 去重
+            if dop in seen_in_round:
+                continue
+            seen_in_round.add(dop)
+
+            mpp = item.get("matched_post_point")
+            entry: Dict[str, Any] = {
+                "matched_post_point": str(mpp).strip() if mpp is not None else None,
+                "derivation_output_point": dop,
+                "matched_score": item.get("matched_score"),
+                "is_fully_derived": item.get("is_fully_derived"),
+            }
+            round_points.append(entry)
+
+        # 累加到累计列表(按 derivation_output_point 去重)
+        for entry in round_points:
+            dop = entry["derivation_output_point"]
+            if dop not in cumulative_set:
+                cumulative_set.add(dop)
+                cumulative.append(entry)
 
         results.append(
             {
@@ -207,106 +225,6 @@ def _dedupe_patterns(raw_patterns: List[Dict[str, Any]]) -> List[Dict[str, Any]]
     return list(key_to_best.values())
 
 
-def _score_patterns_by_matched_points(
-    patterns: List[Dict[str, Any]],
-    account_name: str,
-    post_id: str,
-    matched_post_points: List[str],
-    match_threshold: float,
-) -> List[Dict[str, Any]]:
-    """
-    对传入的 pattern 列表计算其元素与 matched_post_point 列表的匹配分:
-    - 匹配分来源:../input/{account_name}/match_data/{post_id}_匹配_all.json
-      lookup key 为 (帖子选题点, 人设树节点)
-    - 对于每个 pattern 的每个元素(item):
-        * 以 item["name"] 视为人设树节点名称
-        * 对每个 matched_post_point 查找匹配分,取最大值
-    - 仅保留「至少有一个元素匹配分 >= match_threshold」的 pattern。
-    返回的 pattern 结构(item 不再保留 point / dimension 字段):
-    {
-      "id": xxx,
-      "support": xxx,
-      "items": [
-        {
-          "name": "xxx",
-          "type": "xxx",
-          "matched_post_point": "xxx" | null,
-          "matched_score": float,
-        },
-        ...
-      ],
-    }
-    """
-    if not patterns or not matched_post_points:
-        return []
-
-    match_lookup = _load_match_data(account_name, post_id)
-    matched_post_points = [str(x).strip() for x in matched_post_points if str(x).strip()]
-    if not matched_post_points:
-        return []
-
-    results: List[Dict[str, Any]] = []
-    for p in patterns:
-        items = p.get("items") or []
-        if not isinstance(items, list):
-            continue
-        scored_items: List[Dict[str, Any]] = []
-        max_item_score = 0.0
-
-        for it in items:
-            if not isinstance(it, dict):
-                continue
-            name = str(it.get("name") or "").strip()
-            _type = str(it.get("type") or "").strip()
-
-            best_score = 0.0
-            best_post_point: Optional[str] = None
-            if name:
-                for post_point in matched_post_points:
-                    # 如果帖子选题点与节点名称完全一致,直接视为满分匹配
-                    if post_point == name:
-                        s = 1.0
-                    else:
-                        score = match_lookup.get((post_point, name))
-                        if score is None:
-                            continue
-                        try:
-                            s = float(score)
-                        except (TypeError, ValueError):
-                            continue
-                    if s > best_score:
-                        best_score = s
-                        best_post_point = post_point
-
-            if best_score > max_item_score:
-                max_item_score = best_score
-
-            scored_items.append(
-                {
-                    "name": name,
-                    "type": _type,
-                    "matched_post_point": best_post_point,
-                    "matched_score": round(best_score, 6),
-                }
-            )
-
-        if not scored_items:
-            continue
-        if max_item_score < match_threshold:
-            # 该 pattern 在本轮未与帖子形成足够强的匹配
-            continue
-
-        results.append(
-            {
-                "id": p.get("id"),
-                "support": p.get("support"),
-                "items": scored_items,
-            }
-        )
-
-    return results
-
-
 # ---------------------------------------------------------------------------
 # 3. 人设树节点信息 & 聚类节点搜索
 # ---------------------------------------------------------------------------
@@ -399,6 +317,37 @@ class TreeIndex:
                     self.node_info[child]["depth"] = cur_depth + 1
                     q.append(child)
 
+    def find_ancestor_at_level(self, node_name: str, level: int) -> Optional[str]:
+        """
+        在人设树中找到 node_name 的 depth == level 的祖先节点。
+        - 若 node_name 自身 depth == level,直接返回自身。
+        - 若 node_name depth < level(比目标层浅),返回自身。
+        - 否则沿 parent 链向上查找,返回第一个 depth == level 的祖先节点。
+        """
+        info = self.node_info.get(node_name)
+        if not info:
+            return None
+        depth = info.get("depth")
+        if depth is None:
+            return None
+        if depth <= level:
+            return node_name
+        cur = node_name
+        visited: Set[str] = set()
+        while cur and cur not in visited:
+            visited.add(cur)
+            cur_info = self.node_info.get(cur) or {}
+            cur_depth = cur_info.get("depth") or 0
+            if cur_depth == level:
+                return cur
+            if cur_depth < level:
+                return cur
+            parent = cur_info.get("parent")
+            if parent is None:
+                return cur
+            cur = parent
+        return None
+
     # 聚类搜索(不再区分维度)
     def find_clusters(
         self,
@@ -564,95 +513,144 @@ class TreeIndex:
 # ---------------------------------------------------------------------------
 
 def _analyze_single_round(
-    account_name: str,
-    post_id: str,
     patterns: List[Dict[str, Any]],
     tree_index: TreeIndex,
-    cumulative_points: List[str],
-    match_threshold: float,
+    cumulative_points: List[Dict[str, Any]],
     cluster_level: int,
 ) -> Dict[str, Any]:
     """
-    对某一轮(给定累计 matched_post_point 列表)执行分析:
-    - 筛选与帖子匹配度 >= match_threshold 的 pattern
-    - 将 pattern 元素按 matched_score 分为「已推导元素」与「未推导元素」
-    - 在三棵人设树中(不区分维度)为两组元素分别寻找聚类节点
-    """
-    patterns = _score_patterns_by_matched_points(
-        patterns=patterns,
-        account_name=account_name,
-        post_id=post_id,
-        matched_post_points=cumulative_points,
-        match_threshold=match_threshold,
-    )
-    print(f"_score_patterns_by_matched_points len: {len(patterns)}")
+    对某一轮(给定累计 point 列表)执行维度分析:
+
+    1. 从 cumulative_points 中提取 derivation_output_point,
+       在人设树中找到每个节点的 cluster_level 层祖先 → derived_ancestor_set(已推导维度节点集合)。
+    2. 从 deduped_patterns 中筛选出包含 derived_ancestor_set 中节点的 pattern。
+    3. 对筛选出 pattern 的每个元素标记是否已推导:
+       - 元素在 derived_ancestor_set 中 → is_derived=True(已推导维度)
+       - 其他 → is_derived=False(未推导维度)
+    4. 汇总 derived_dims / underived_dims 列表。
 
-    # 已推导 / 未推导 元素列表(不再按维度拆分)
-    derived_elems: List[str] = []
-    underived_elems: List[str] = []
+    返回结构:
+    {
+      "cumulative_points": [...],          # 原始累计 point 对象列表
+      "derived_ancestor_nodes": [...],     # 所有 derivation_output_point 对应的 cluster_level 层祖先节点(已推导维度节点集合)
+      "patterns": [...],                   # 筛选后带 is_derived 标记的 pattern 列表
+      "derived_dims": [...],               # 已推导维度节点(去重,出现于筛选 pattern 中)
+      "underived_dims": [...],             # 未推导维度节点(去重,排除已推导节点)
+      "patterns_count": int,
+      "derived_dim_count": int,
+      "underived_dim_count": int,
+    }
+    """
+    # 1. 收集 derived_ancestor_set,同时记录每个祖先节点对应的 matched_post_point 来源
+    derived_ancestor_set: Set[str] = set()
+    ancestor_to_mpps: Dict[str, List[str]] = {}  # 祖先节点 -> [matched_post_point, ...]
+    for entry in cumulative_points:
+        dop = entry.get("derivation_output_point")
+        if not dop:
+            continue
+        ancestor = tree_index.find_ancestor_at_level(str(dop).strip(), cluster_level)
+        if not ancestor:
+            continue
+        derived_ancestor_set.add(ancestor)
+        mpp = entry.get("matched_post_point") or ""
+        if mpp and mpp not in ancestor_to_mpps.get(ancestor, []):
+            ancestor_to_mpps.setdefault(ancestor, []).append(mpp)
 
+    # 2. 筛选 pattern:已推导维度节点占所有元素的比例 >= 50%
+    filtered_patterns: List[Dict[str, Any]] = []
     for p in patterns:
-        for it in p.get("items", []):
+        items = p.get("items") or []
+        item_names = [
+            str(it.get("name") or "").strip()
+            for it in items
+            if isinstance(it, dict)
+        ]
+        if not item_names:
+            continue
+        if len(item_names) < 5:
+            continue
+        derived_count = sum(1 for name in item_names if name in derived_ancestor_set)
+        if derived_count / len(item_names) >= 0.5:
+            filtered_patterns.append(p)
+
+    print(
+        f"filtered_patterns: {len(filtered_patterns)}, "
+        f"derived_ancestor_set: {len(derived_ancestor_set)}"
+    )
+
+    def _node_label(name: str, is_derived: bool) -> str:
+        """
+        返回格式化标签:
+        - 已推导节点:'node_name->dimension(mpp1,mpp2,...)'
+        - 未推导节点:'node_name->dimension'
+        """
+        dim = (tree_index.node_info.get(name) or {}).get("dimension") or ""
+        base = f"{name}->{dim}" if dim else name
+        if is_derived:
+            mpps = ancestor_to_mpps.get(name) or []
+            if mpps:
+                return f"{base}({','.join(mpps)})"
+        return base
+
+    # 3. 对筛选 pattern 元素分类并汇总维度列表
+    derived_dims: List[str] = []
+    underived_dims: List[str] = []
+    derived_dims_seen: Set[str] = set()
+    underived_dims_seen: Set[str] = set()
+
+    scored_patterns: List[Dict[str, Any]] = []
+    for p in filtered_patterns:
+        items = p.get("items") or []
+        tagged_items: List[Dict[str, Any]] = []
+        for it in items:
             if not isinstance(it, dict):
                 continue
-            node_name = str(it.get("name") or "").strip()
-            if not node_name:
-                continue
-            score = float(it.get("matched_score") or 0.0)
-            if score >= match_threshold:
-                derived_elems.append(node_name)
+            name = str(it.get("name") or "").strip()
+            is_derived = name in derived_ancestor_set
+            tagged_items.append(
+                {
+                    "name": name,
+                    "is_derived": is_derived,
+                }
+            )
+            if is_derived:
+                if name and name not in derived_dims_seen:
+                    derived_dims_seen.add(name)
+                    derived_dims.append(_node_label(name, is_derived=True))
             else:
-                underived_elems.append(node_name)
+                if name and name not in underived_dims_seen:
+                    underived_dims_seen.add(name)
+                    underived_dims.append(_node_label(name, is_derived=False))
 
-    # 为避免重复元素干扰统计与聚类,先做去重
-    derived_set: List[str] = list(dict.fromkeys(derived_elems))
-    underived_set: List[str] = list(dict.fromkeys(underived_elems))
+        scored_patterns.append(
+            {
+                "id": p.get("id"),
+                "support": p.get("support"),
+                "items": tagged_items,
+            }
+        )
 
-    clusters: Dict[str, Any] = {
-        "derived": [],
-        "underived": [],
-    }
+    # 从 underived_dims 中排除与 derived_dims 重叠的节点
+    underived_dims = [d for d in underived_dims if d.split("->")[0] not in derived_dims_seen]
 
-    # 已推导元素聚类
-    if derived_set:
-        c = tree_index.find_clusters(derived_set, cluster_level=cluster_level)
-        clusters["derived"] = c or []
-
-    # 未推导元素聚类
-    if underived_set:
-        c = tree_index.find_clusters(underived_set, cluster_level=cluster_level)
-        clusters["underived"] = c or []
-
-    # 在同一轮中,如果某个 cluster_node 已经在 derived 聚类里出现过,
-    # 则从 underived 聚类中剔除该 cluster_node,避免重复展示。
-    if isinstance(clusters.get("derived"), list) and isinstance(clusters.get("underived"), list):
-        derived_nodes = {
-            str(item.get("cluster_node"))
-            for item in clusters["derived"]
-            if isinstance(item, dict) and item.get("cluster_node") is not None
-        }
-        if derived_nodes:
-            filtered_underived = []
-            for item in clusters["underived"]:
-                if not isinstance(item, dict):
-                    continue
-                node = str(item.get("cluster_node"))
-                if node in derived_nodes:
-                    continue
-                filtered_underived.append(item)
-            clusters["underived"] = filtered_underived
+    # 按 is_derived=True 的元素数量从高到低排序,数量相同再按元素总数从高到低
+    scored_patterns.sort(
+        key=lambda x: (
+            sum(1 for it in x.get("items", []) if it.get("is_derived")),
+            len(x.get("items", [])),
+        ),
+        reverse=True,
+    )
 
     return {
-        "matched_post_points": list(cumulative_points),
-        "patterns": patterns,
-        "clusters": clusters,
-        # 统计信息:
-        # - patterns_count: 本轮参与分析的 pattern 数量
-        # - derived_cluster_count: 已推导元素聚类节点数量
-        # - underived_cluster_count: 未推导元素聚类节点数量
-        "patterns_count": len(patterns),
-        "derived_cluster_count": len(clusters["derived"]) if isinstance(clusters.get("derived"), list) else 0,
-        "underived_cluster_count": len(clusters["underived"]) if isinstance(clusters.get("underived"), list) else 0,
+        "cumulative_points": list(cumulative_points),
+        "derived_ancestor_nodes": sorted(derived_ancestor_set),
+        "patterns": scored_patterns,
+        "derived_dims": derived_dims,
+        "underived_dims": underived_dims,
+        "patterns_count": len(scored_patterns),
+        "derived_dim_count": len(derived_dims),
+        "underived_dim_count": len(underived_dims),
     }
 
 
@@ -661,7 +659,6 @@ def pattern_dimension_analyze(
     account_name: str,
     post_id: str,
     log_id: str,
-    match_threshold: float = 0.6,
     cluster_level: int = 2,
 ) -> Dict[str, Any]:
     """
@@ -670,11 +667,16 @@ def pattern_dimension_analyze(
     参数
     -------
     account_name : 账号名(用于定位 input / output 下的数据目录)
-    post_id : 帖子 ID(用于定位推导日志与帖子匹配数据
+    post_id : 帖子 ID(用于定位推导日志)
     log_id : 推导日志目录名(../output/{account_name}/推导日志/{post_id}/{log_id}/)
-    match_threshold : pattern 元素与 matched_post_point 的最小匹配分,默认 0.6
-    cluster_level : 在人设树中搜索聚类节点的聚类层级(root 为 0 层),默认 2
-
+    cluster_level : 在人设树中查找祖先节点的目标深度(root 为 0 层),默认 2
+
+    逻辑概述
+    --------
+    每一轮:
+    1. 从 derivation_output_point 在人设树中找到 cluster_level 层祖先节点 → 已推导维度节点集合。
+    2. 筛选包含已推导维度节点的 pattern。
+    3. 标记每个 pattern 元素是否已推导,汇总 derived_dims / underived_dims。
     """
     eval_dir = _round_eval_dir(account_name, post_id, log_id)
     if not eval_dir.is_dir():
@@ -686,7 +688,6 @@ def pattern_dimension_analyze(
             "account_name": account_name,
             "post_id": post_id,
             "log_id": log_id,
-            "match_threshold": match_threshold,
             "cluster_level": cluster_level,
             "rounds": [],
             "message": "未在指定日志目录下找到任何评估结果文件(*_评估.json)",
@@ -703,40 +704,29 @@ def pattern_dimension_analyze(
         r = info["round"]
         cumulative_points = info["cumulative_points"]
         analyzed = _analyze_single_round(
-            account_name=account_name,
-            post_id=post_id,
             patterns=deduped_patterns,
             tree_index=tree_index,
             cumulative_points=cumulative_points,
-            match_threshold=match_threshold,
             cluster_level=cluster_level,
         )
         analyzed["round"] = r
         rounds_output.append(analyzed)
 
-    result = {
+    return {
         "account_name": account_name,
         "post_id": post_id,
         "log_id": log_id,
-        "match_threshold": match_threshold,
         "cluster_level": cluster_level,
         "rounds": rounds_output,
     }
-    return result
 
 
-def main() -> None:
+def main(account_name, post_id, log_id) -> None:
     """本地简单测试:以家有大志账号的一次推导日志做分析,并将结果写入输出目录。"""
-    account_name = "家有大志"
-    post_id = "68fb6a5c000000000302e5de"
-    # 需要根据实际运行结果修改为最新的 log_id
-    log_id = "20260317112639"
-
     result = pattern_dimension_analyze(
         account_name=account_name,
         post_id=post_id,
         log_id=log_id,
-        match_threshold=0.5,
         cluster_level=3,
     )
     # 控制台打印前 4000 字符,便于快速查看
@@ -753,5 +743,15 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
+    account_name = "家有大志"
+
+    items = [
+        {"post_id": "68fb6a5c000000000302e5de", "log_id": "20260317214307"},
+        {"post_id": "69185d49000000000d00f94e", "log_id": "20260317214841"},
+        {"post_id": "6921937a000000001b0278d1", "log_id": "20260317215616"}
+    ]
+    for item in items:
+        post_id = item["post_id"]
+        log_id = item["log_id"]
+        main(account_name, post_id, log_id)
 

Некоторые файлы не были показаны из-за большого количества измененных файлов