1 day ago · ea27fad17a
--- a/config/accounts.json
+++ b/config/accounts.json
@@ -2,37 +2,22 @@
 
				   "data_root": "../data",
			
 
				   "accounts": [
			
 
				     {
			
 
				-      "name": "阿里多多酱",
			
 
				+      "name": "阿里多多酱_1125",
			
 
				       "enabled": false,
			
 
				       "description": "旧目录结构（已停用）"
			
 
				     },
			
 
				     {
			
 
				-      "name": "阿里多多酱1",
			
 
				-      "enabled": false,
			
 
				-      "description": "新目录结构（已停用）"
			
 
				-    },
			
 
				-    {
			
 
				-      "name": "阿里多多酱2",
			
 
				-      "enabled": false,
			
 
				-      "description": "新目录结构（已停用）"
			
 
				-    },
			
 
				-    {
			
 
				-      "name": "阿里多多酱3",
			
 
				+      "name": "阿里多多酱_1203",
			
 
				       "enabled": true,
			
 
				-      "description": "新目录结构"
			
 
				+      "description": "新目录结构（1203版本）"
			
 
				     },
			
 
				     {
			
 
				       "name": "摸鱼阿希",
			
 
				-      "enabled": true,
			
 
				-      "description": "新目录结构"
			
 
				-    },
			
 
				-    {
			
 
				-      "name": "示例账号2",
			
 
				       "enabled": false,
			
 
				-      "description": "未启用的示例账号"
			
 
				+      "description": "新目录结构"
			
 
				     }
			
 
				   ],
			
 
				-  "default_account": "阿里多多酱_1125",
			
 
				+  "default_account": "阿里多多酱_1203",
			
 
				   "comment": "数据根目录可通过 data_root 配置（支持绝对路径、~、环境变量），也可通过 DATA_ROOT 环境变量覆盖",
			
 
				   "filter_mode": "exclude_current_posts",
			
 
				   "filter_modes": {
			
@@ -54,7 +39,7 @@
 
				     "input": {
			
 
				       "current_posts": "解构内容/what单独",
			
 
				       "historical_posts": "解构内容/pattern聚类",
			
 
				-      "pattern_cluster": "pattern相关文件/optimization/optimized_clustered_data_gemini-3-pro-preview.json"
			
 
				+      "pattern_cluster": "pattern相关文件/detail/optimized_clustered_data_gemini-3-pro-preview_concurrent.json"
			
 
				     },
			
 
				     "output": {
			
 
				       "intermediate": "how解构_outputs/{version}/intermediate",
			
--- a/script/data_processing/build_persona_graph.py
+++ b/script/data_processing/build_persona_graph.py
@@ -887,8 +887,9 @@ def main():
 
				 
			
 
				     # 输入文件路径
			
 
				     pattern_file = config.pattern_cluster_file
			
 
				-    associations_file = config.account_dir / "pattern相关文件/optimization/dimension_associations_analysis.json"
			
 
				-    intra_associations_file = config.account_dir / "pattern相关文件/optimization/intra_dimension_associations_analysis.json"
			
 
				+    # 使用新的 detail 目录
			
 
				+    associations_file = config.account_dir / "pattern相关文件/detail/dimension_associations_analysis.json"
			
 
				+    intra_associations_file = config.account_dir / "pattern相关文件/detail/intra_dimension_associations_analysis.json"
			
 
				     historical_posts_dir = config.historical_posts_dir
			
 
				 
			
 
				     # 输出文件路径
			
--- a/script/data_processing/extract_current_posts.py
+++ b/script/data_processing/extract_current_posts.py
@@ -1,14 +1,14 @@
 
				 #!/usr/bin/env python3
			
 
				 # -*- coding: utf-8 -*-
			
 
				 """
			
 
				-从当前帖子_what解构结果目录中提取解构任务列表
			
 
				+从当前帖子目录中提取解构任务列表
			
 
				+支持新版数据结构（inspiration_final_result, purpose_final_result, keypoint_final）
			
 
				 """
			
 
				 
			
 
				 import json
			
 
				 from pathlib import Path
			
 
				 from typing import Dict, List, Optional
			
 
				 import sys
			
 
				-import re
			
 
				 
			
 
				 # 添加项目根目录到路径
			
 
				 project_root = Path(__file__).parent.parent.parent
			
@@ -19,11 +19,10 @@ from script.data_processing.path_config import PathConfig
 
				 
			
 
				 
			
 
				 def extract_post_id_from_filename(filename: str) -> str:
			
 
				-    """从文件名中提取帖子ID"""
			
 
				-    match = re.match(r'^([^_]+)_', filename)
			
 
				-    if match:
			
 
				-        return match.group(1)
			
 
				-    return ""
			
 
				+    """从文件名中提取帖子ID
			
 
				+    格式: 68a6b96f000000001d006058.json
			
 
				+    """
			
 
				+    return filename.replace('.json', '')
			
 
				 
			
 
				 
			
 
				 def get_post_detail(post_id: str) -> Optional[Dict]:
			
@@ -36,148 +35,107 @@ def get_post_detail(post_id: str) -> Optional[Dict]:
 
				         return None
			
 
				 
			
 
				 
			
 
				-def extract_features_from_point(point_data: Dict) -> List[Dict]:
			
 
				-    """
			
 
				-    从点数据中提取特征信息列表（包含名称和权重）
			
 
				-
			
 
				-    Args:
			
 
				-        point_data: 点的数据（包含"提取的特征"字段）
			
 
				-
			
 
				-    Returns:
			
 
				-        特征信息列表，每项包含 {"特征名称": str, "权重": float}
			
 
				-    """
			
 
				-    features = []
			
 
				-    if "提取的特征" in point_data and isinstance(point_data["提取的特征"], list):
			
 
				-        for feature in point_data["提取的特征"]:
			
 
				-            if "特征名称" in feature:
			
 
				-                feature_item = {
			
 
				-                    "特征名称": feature["特征名称"],
			
 
				-                    "权重": feature.get("权重", 1.0)  # 默认权重为1.0
			
 
				-                }
			
 
				-                features.append(feature_item)
			
 
				-    return features
			
 
				-
			
 
				-
			
 
				-def process_inspiration_points(inspiration_data: Dict) -> List[Dict]:
			
 
				-    """
			
 
				-    处理灵感点数据
			
 
				-
			
 
				-    Args:
			
 
				-        inspiration_data: 灵感点数据
			
 
				-
			
 
				-    Returns:
			
 
				-        灵感点列表
			
 
				-    """
			
 
				+def process_inspiration_points(data: Dict) -> List[Dict]:
			
 
				+    """处理灵感点数据"""
			
 
				     result = []
			
 
				-
			
 
				-    # 处理三个维度：全新内容、共性差异、共性内容
			
 
				-    for dimension in ["全新内容", "共性差异", "共性内容"]:
			
 
				-        if dimension in inspiration_data and isinstance(inspiration_data[dimension], list):
			
 
				-            for item in inspiration_data[dimension]:
			
 
				-                point_item = {
			
 
				-                    "名称": item.get("灵感点", ""),
			
 
				-                    "描述": item.get("描述", ""),
			
 
				-                    "特征列表": extract_features_from_point(item)
			
 
				-                }
			
 
				-                result.append(point_item)
			
 
				-
			
 
				+    if "inspiration_final_result" not in data:
			
 
				+        return result
			
 
				+
			
 
				+    inspiration_data = data["inspiration_final_result"]
			
 
				+    for item in inspiration_data.get("最终灵感点列表", []):
			
 
				+        point_item = {
			
 
				+            "ID": item.get("id", ""),
			
 
				+            "名称": item.get("灵感点", ""),
			
 
				+            "类型": item.get("类型", ""),
			
 
				+            "描述": item.get("描述", ""),
			
 
				+            "置信度": item.get("置信度", ""),
			
 
				+            "支撑的ID": [],
			
 
				+            "关联的ID": []
			
 
				+        }
			
 
				+        result.append(point_item)
			
 
				     return result
			
 
				 
			
 
				 
			
 
				-def process_purpose_points(purpose_data: Dict) -> List[Dict]:
			
 
				-    """
			
 
				-    处理目的点数据
			
 
				-
			
 
				-    Args:
			
 
				-        purpose_data: 目的点数据
			
 
				-
			
 
				-    Returns:
			
 
				-        目的点列表
			
 
				-    """
			
 
				+def process_purpose_points(data: Dict) -> List[Dict]:
			
 
				+    """处理目的点数据（意图+实质）"""
			
 
				     result = []
			
 
				-
			
 
				-    if "purposes" in purpose_data and isinstance(purpose_data["purposes"], list):
			
 
				-        for item in purpose_data["purposes"]:
			
 
				-            point_item = {
			
 
				-                "名称": item.get("目的点", ""),
			
 
				-                "描述": item.get("描述", ""),
			
 
				-                "特征列表": extract_features_from_point(item)
			
 
				-            }
			
 
				-            result.append(point_item)
			
 
				+    if "purpose_final_result" not in data:
			
 
				+        return result
			
 
				+
			
 
				+    purpose_data = data["purpose_final_result"]
			
 
				+
			
 
				+    # 处理意图列表
			
 
				+    for item in purpose_data.get("最终意图列表", []):
			
 
				+        point_item = {
			
 
				+            "ID": item.get("意图ID", ""),
			
 
				+            "名称": item.get("目的点", ""),
			
 
				+            "类型": "意图",
			
 
				+            "描述": item.get("描述", ""),
			
 
				+            "置信度": item.get("置信度", ""),
			
 
				+            "支撑的ID": [],
			
 
				+            "关联的ID": []
			
 
				+        }
			
 
				+        result.append(point_item)
			
 
				+
			
 
				+    # 处理实质列表
			
 
				+    for item in purpose_data.get("最终实质列表", []):
			
 
				+        related_id = item.get("关联意图ID", "")
			
 
				+        point_item = {
			
 
				+            "ID": item.get("实质ID", ""),
			
 
				+            "名称": item.get("目的点", ""),
			
 
				+            "类型": "实质",
			
 
				+            "描述": item.get("描述", ""),
			
 
				+            "置信度": item.get("置信度", ""),
			
 
				+            "支撑的ID": [],
			
 
				+            "关联的ID": [related_id] if related_id else []
			
 
				+        }
			
 
				+        result.append(point_item)
			
 
				 
			
 
				     return result
			
 
				 
			
 
				 
			
 
				-def process_key_points(key_data: Dict) -> List[Dict]:
			
 
				-    """
			
 
				-    处理关键点数据
			
 
				-
			
 
				-    Args:
			
 
				-        key_data: 关键点数据
			
 
				-
			
 
				-    Returns:
			
 
				-        关键点列表
			
 
				-    """
			
 
				+def process_key_points(data: Dict) -> List[Dict]:
			
 
				+    """处理关键点数据"""
			
 
				     result = []
			
 
				-
			
 
				-    if "key_points" in key_data and isinstance(key_data["key_points"], list):
			
 
				-        for item in key_data["key_points"]:
			
 
				-            point_item = {
			
 
				-                "名称": item.get("关键点", ""),
			
 
				-                "描述": item.get("描述", ""),
			
 
				-                "特征列表": extract_features_from_point(item)
			
 
				-            }
			
 
				-            result.append(point_item)
			
 
				-
			
 
				+    if "keypoint_final" not in data:
			
 
				+        return result
			
 
				+
			
 
				+    keypoint_data = data["keypoint_final"]
			
 
				+    for item in keypoint_data.get("最终关键点列表", []):
			
 
				+        point_item = {
			
 
				+            "ID": item.get("关键点ID", ""),
			
 
				+            "名称": item.get("关键点", ""),
			
 
				+            "类型": item.get("类型", ""),
			
 
				+            "描述": item.get("描述", ""),
			
 
				+            "置信度": item.get("置信度", ""),
			
 
				+            "支撑的ID": item.get("支撑的ID", []),
			
 
				+            "关联的ID": []
			
 
				+        }
			
 
				+        result.append(point_item)
			
 
				     return result
			
 
				 
			
 
				 
			
 
				 def process_single_file(file_path: Path) -> Optional[Dict]:
			
 
				-    """
			
 
				-    处理单个JSON文件
			
 
				-
			
 
				-    Args:
			
 
				-        file_path: JSON文件路径
			
 
				-
			
 
				-    Returns:
			
 
				-        解构任务字典，如果处理失败则返回None
			
 
				-    """
			
 
				-    # 从文件名提取帖子ID
			
 
				+    """处理单个JSON文件"""
			
 
				     post_id = extract_post_id_from_filename(file_path.name)
			
 
				     if not post_id:
			
 
				         print(f"  警告: 无法从文件名提取帖子ID: {file_path.name}")
			
 
				         return None
			
 
				 
			
 
				     try:
			
 
				-        # 读取文件
			
 
				         with open(file_path, "r", encoding="utf-8") as f:
			
 
				             data = json.load(f)
			
 
				 
			
 
				-        # 获取帖子详情
			
 
				         print(f"  获取帖子 {post_id} 的详情...")
			
 
				         post_detail = get_post_detail(post_id)
			
 
				         if not post_detail:
			
 
				             print(f"  警告: 未能获取帖子 {post_id} 的详情")
			
 
				 
			
 
				-        # 提取三点解构数据
			
 
				-        three_points = data.get("三点解构", {})
			
 
				+        # 提取三点数据
			
 
				+        inspiration_points = process_inspiration_points(data)
			
 
				+        purpose_points = process_purpose_points(data)
			
 
				+        key_points = process_key_points(data)
			
 
				 
			
 
				-        # 处理灵感点
			
 
				-        inspiration_points = []
			
 
				-        if "灵感点" in three_points:
			
 
				-            inspiration_points = process_inspiration_points(three_points["灵感点"])
			
 
				-
			
 
				-        # 处理目的点
			
 
				-        purpose_points = []
			
 
				-        if "目的点" in three_points:
			
 
				-            purpose_points = process_purpose_points(three_points["目的点"])
			
 
				-
			
 
				-        # 处理关键点
			
 
				-        key_points = []
			
 
				-        if "关键点" in three_points:
			
 
				-            key_points = process_key_points(three_points["关键点"])
			
 
				-
			
 
				-        # 构建结果
			
 
				         task_item = {
			
 
				             "帖子id": post_id,
			
 
				             "帖子详情": post_detail if post_detail else {},
			
@@ -196,56 +154,43 @@ def process_single_file(file_path: Path) -> Optional[Dict]:
 
				 
			
 
				 
			
 
				 def main():
			
 
				-    # 使用路径配置
			
 
				     config = PathConfig()
			
 
				-
			
 
				-    # 确保输出目录存在
			
 
				     config.ensure_dirs()
			
 
				 
			
 
				-    # 获取路径
			
 
				     input_dir = config.current_posts_dir
			
 
				     output_file = config.task_list_file
			
 
				 
			
 
				     print(f"账号: {config.account_name}")
			
 
				     print(f"当前帖子目录: {input_dir}")
			
 
				     print(f"输出文件: {output_file}")
			
 
				-    print()
			
 
				 
			
 
				-    print(f"正在扫描目录: {input_dir}")
			
 
				+    print(f"\n正在扫描目录: {input_dir}")
			
 
				 
			
 
				-    # 获取所有JSON文件
			
 
				     json_files = list(input_dir.glob("*.json"))
			
 
				-    print(f"找到 {len(json_files)} 个JSON文件\n")
			
 
				+    print(f"找到 {len(json_files)} 个JSON文件")
			
 
				 
			
 
				-    # 处理所有文件
			
 
				     task_list = []
			
 
				     for i, file_path in enumerate(json_files, 1):
			
 
				-        print(f"处理文件 [{i}/{len(json_files)}]: {file_path.name}")
			
 
				+        print(f"\n处理文件 [{i}/{len(json_files)}]: {file_path.name}")
			
 
				         task_item = process_single_file(file_path)
			
 
				         if task_item:
			
 
				             task_list.append(task_item)
			
 
				             print(f"  ✓ 成功提取")
			
 
				-        print()
			
 
				 
			
 
				-    # 构建最终结果
			
 
				-    final_result = {
			
 
				-        "解构任务列表": task_list
			
 
				-    }
			
 
				+    # 统计
			
 
				+    total_inspiration = sum(len(t["what解构结果"]["灵感点列表"]) for t in task_list)
			
 
				+    total_purpose = sum(len(t["what解构结果"]["目的点列表"]) for t in task_list)
			
 
				+    total_key = sum(len(t["what解构结果"]["关键点列表"]) for t in task_list)
			
 
				 
			
 
				-    # 统计信息
			
 
				-    print(f"提取统计:")
			
 
				+    print(f"\n提取统计:")
			
 
				     print(f"  总帖子数: {len(task_list)}")
			
 
				-    total_inspiration = sum(len(task["what解构结果"]["灵感点列表"]) for task in task_list)
			
 
				-    total_purpose = sum(len(task["what解构结果"]["目的点列表"]) for task in task_list)
			
 
				-    total_key = sum(len(task["what解构结果"]["关键点列表"]) for task in task_list)
			
 
				     print(f"  总灵感点: {total_inspiration} 个")
			
 
				     print(f"  总目的点: {total_purpose} 个")
			
 
				     print(f"  总关键点: {total_key} 个")
			
 
				 
			
 
				-    # 保存结果
			
 
				     print(f"\n正在保存结果到: {output_file}")
			
 
				     with open(output_file, "w", encoding="utf-8") as f:
			
 
				-        json.dump(final_result, f, ensure_ascii=False, indent=4)
			
 
				+        json.dump(task_list, f, ensure_ascii=False, indent=2)
			
 
				 
			
 
				     print("完成!")
			
 
				 
			
--- a/script/data_processing/extract_feature_categories.py
+++ b/script/data_processing/extract_feature_categories.py
@@ -19,11 +19,10 @@ from script.data_processing.path_config import PathConfig
 
				 
			
 
				 
			
 
				 def extract_post_id_from_filename(filename: str) -> str:
			
 
				-    """从文件名中提取帖子ID"""
			
 
				-    match = re.match(r'^([^_]+)_', filename)
			
 
				-    if match:
			
 
				-        return match.group(1)
			
 
				-    return ""
			
 
				+    """从文件名中提取帖子ID
			
 
				+    格式: 68a6b96f000000001d006058.json
			
 
				+    """
			
 
				+    return filename.replace('.json', '')
			
 
				 
			
 
				 
			
 
				 def get_post_detail(post_id: str) -> Optional[Dict]:
			
--- a/script/data_processing/extract_features_from_posts.py
+++ b/script/data_processing/extract_features_from_posts.py
@@ -1,7 +1,8 @@
 
				 #!/usr/bin/env python3
			
 
				 # -*- coding: utf-8 -*-
			
 
				 """
			
 
				-从过去帖子_what解构结果目录中提取特征名称及其来源信息
			
 
				+从过去帖子解构结果目录中提取特征名称及其来源信息
			
 
				+仅支持新版数据结构（inspiration_final_result, purpose_final_result, keypoint_final）
			
 
				 """
			
 
				 
			
 
				 import json
			
@@ -19,23 +20,14 @@ from script.data_processing.path_config import PathConfig
 
				 
			
 
				 
			
 
				 def extract_post_id_from_filename(filename: str) -> str:
			
 
				-    """从文件名中提取帖子ID"""
			
 
				-    match = re.match(r'^([^_]+)_', filename)
			
 
				-    if match:
			
 
				-        return match.group(1)
			
 
				-    return ""
			
 
				-
			
 
				-
			
 
				-def get_post_detail(post_id: str) -> Optional[Dict]:
			
 
				+    """从文件名中提取帖子ID
			
 
				+    支持格式: 68a6b96f000000001d006058.json
			
 
				     """
			
 
				-    获取帖子详情
			
 
				+    return filename.replace('.json', '')
			
 
				 
			
 
				-    Args:
			
 
				-        post_id: 帖子ID
			
 
				 
			
 
				-    Returns:
			
 
				-        帖子详情字典，如果获取失败则返回None
			
 
				-    """
			
 
				+def get_post_detail(post_id: str) -> Optional[Dict]:
			
 
				+    """获取帖子详情"""
			
 
				     try:
			
 
				         detail = get_xiaohongshu_detail(post_id)
			
 
				         return detail
			
@@ -44,35 +36,6 @@ def get_post_detail(post_id: str) -> Optional[Dict]:
 
				         return None
			
 
				 
			
 
				 
			
 
				-def extract_features_from_point(point_data: Dict, post_id: str, point_name: str, point_description: str) -> List[Dict]:
			
 
				-    """
			
 
				-    从单个点（灵感点/目的点/关键点）中提取特征信息
			
 
				-
			
 
				-    Args:
			
 
				-        point_data: 点的数据
			
 
				-        post_id: 帖子ID
			
 
				-        point_name: 点的名称
			
 
				-        point_description: 点的描述
			
 
				-
			
 
				-    Returns:
			
 
				-        特征列表
			
 
				-    """
			
 
				-    features = []
			
 
				-
			
 
				-    # 检查是否有"提取的特征"字段
			
 
				-    if "提取的特征" in point_data and isinstance(point_data["提取的特征"], list):
			
 
				-        for feature in point_data["提取的特征"]:
			
 
				-            if "特征名称" in feature:
			
 
				-                features.append({
			
 
				-                    "特征名称": feature["特征名称"],
			
 
				-                    "点的名称": point_name,
			
 
				-                    "点的描述": point_description,
			
 
				-                    "帖子id": post_id
			
 
				-                })
			
 
				-
			
 
				-    return features
			
 
				-
			
 
				-
			
 
				 def process_single_file(file_path: Path) -> Dict[str, Dict[str, List[Dict]]]:
			
 
				     """
			
 
				     处理单个JSON文件，提取所有特征信息
			
@@ -89,113 +52,81 @@ def process_single_file(file_path: Path) -> Dict[str, Dict[str, List[Dict]]]:
 
				         "关键点": {}
			
 
				     }
			
 
				 
			
 
				-    # 从文件名提取帖子ID
			
 
				     post_id = extract_post_id_from_filename(file_path.name)
			
 
				 
			
 
				     try:
			
 
				         with open(file_path, "r", encoding="utf-8") as f:
			
 
				             data = json.load(f)
			
 
				 
			
 
				-        # 提取三点解构数据
			
 
				-        if "三点解构" not in data:
			
 
				-            return result
			
 
				+        # 处理灵感点
			
 
				+        if "inspiration_final_result" in data:
			
 
				+            inspiration_data = data["inspiration_final_result"]
			
 
				+            for item in inspiration_data.get("最终灵感点列表", []):
			
 
				+                feature_name = item.get("灵感点", "")
			
 
				+                if not feature_name:
			
 
				+                    continue
			
 
				+                if feature_name not in result["灵感点"]:
			
 
				+                    result["灵感点"][feature_name] = []
			
 
				+                result["灵感点"][feature_name].append({
			
 
				+                    "点的名称": feature_name,
			
 
				+                    "点的描述": item.get("描述", ""),
			
 
				+                    "帖子id": post_id,
			
 
				+                    "点ID": item.get("id", ""),
			
 
				+                    "类型": item.get("类型", "")
			
 
				+                })
			
 
				+
			
 
				+        # 处理目的点（意图+实质）
			
 
				+        if "purpose_final_result" in data:
			
 
				+            purpose_data = data["purpose_final_result"]
			
 
				 
			
 
				-        three_points = data["三点解构"]
			
 
				+            # 处理意图列表
			
 
				+            for item in purpose_data.get("最终意图列表", []):
			
 
				+                feature_name = item.get("目的点", "")
			
 
				+                if not feature_name:
			
 
				+                    continue
			
 
				+                if feature_name not in result["目的点"]:
			
 
				+                    result["目的点"][feature_name] = []
			
 
				+                result["目的点"][feature_name].append({
			
 
				+                    "点的名称": feature_name,
			
 
				+                    "点的描述": item.get("描述", ""),
			
 
				+                    "帖子id": post_id,
			
 
				+                    "点ID": item.get("意图ID", ""),
			
 
				+                    "类型": "意图"
			
 
				+                })
			
 
				 
			
 
				-        # 处理灵感点
			
 
				-        if "灵感点" in three_points:
			
 
				-            inspiration = three_points["灵感点"]
			
 
				-
			
 
				-            # 处理全新内容
			
 
				-            if "全新内容" in inspiration and isinstance(inspiration["全新内容"], list):
			
 
				-                for item in inspiration["全新内容"]:
			
 
				-                    point_name = item.get("灵感点", "")
			
 
				-                    point_desc = item.get("描述", "")
			
 
				-                    features = extract_features_from_point(item, post_id, point_name, point_desc)
			
 
				-
			
 
				-                    for feature in features:
			
 
				-                        feature_name = feature["特征名称"]
			
 
				-                        if feature_name not in result["灵感点"]:
			
 
				-                            result["灵感点"][feature_name] = []
			
 
				-                        result["灵感点"][feature_name].append({
			
 
				-                            "点的名称": feature["点的名称"],
			
 
				-                            "点的描述": feature["点的描述"],
			
 
				-                            "帖子id": feature["帖子id"]
			
 
				-                        })
			
 
				-
			
 
				-            # 处理共性差异
			
 
				-            if "共性差异" in inspiration and isinstance(inspiration["共性差异"], list):
			
 
				-                for item in inspiration["共性差异"]:
			
 
				-                    point_name = item.get("灵感点", "")
			
 
				-                    point_desc = item.get("描述", "")
			
 
				-                    features = extract_features_from_point(item, post_id, point_name, point_desc)
			
 
				-
			
 
				-                    for feature in features:
			
 
				-                        feature_name = feature["特征名称"]
			
 
				-                        if feature_name not in result["灵感点"]:
			
 
				-                            result["灵感点"][feature_name] = []
			
 
				-                        result["灵感点"][feature_name].append({
			
 
				-                            "点的名称": feature["点的名称"],
			
 
				-                            "点的描述": feature["点的描述"],
			
 
				-                            "帖子id": feature["帖子id"]
			
 
				-                        })
			
 
				-
			
 
				-            # 处理共性内容
			
 
				-            if "共性内容" in inspiration and isinstance(inspiration["共性内容"], list):
			
 
				-                for item in inspiration["共性内容"]:
			
 
				-                    point_name = item.get("灵感点", "")
			
 
				-                    point_desc = item.get("描述", "")
			
 
				-                    features = extract_features_from_point(item, post_id, point_name, point_desc)
			
 
				-
			
 
				-                    for feature in features:
			
 
				-                        feature_name = feature["特征名称"]
			
 
				-                        if feature_name not in result["灵感点"]:
			
 
				-                            result["灵感点"][feature_name] = []
			
 
				-                        result["灵感点"][feature_name].append({
			
 
				-                            "点的名称": feature["点的名称"],
			
 
				-                            "点的描述": feature["点的描述"],
			
 
				-                            "帖子id": feature["帖子id"]
			
 
				-                        })
			
 
				-
			
 
				-        # 处理目的点
			
 
				-        if "目的点" in three_points:
			
 
				-            purpose = three_points["目的点"]
			
 
				-
			
 
				-            if "purposes" in purpose and isinstance(purpose["purposes"], list):
			
 
				-                for item in purpose["purposes"]:
			
 
				-                    point_name = item.get("目的点", "")
			
 
				-                    point_desc = item.get("描述", "")
			
 
				-                    features = extract_features_from_point(item, post_id, point_name, point_desc)
			
 
				-
			
 
				-                    for feature in features:
			
 
				-                        feature_name = feature["特征名称"]
			
 
				-                        if feature_name not in result["目的点"]:
			
 
				-                            result["目的点"][feature_name] = []
			
 
				-                        result["目的点"][feature_name].append({
			
 
				-                            "点的名称": feature["点的名称"],
			
 
				-                            "点的描述": feature["点的描述"],
			
 
				-                            "帖子id": feature["帖子id"]
			
 
				-                        })
			
 
				+            # 处理实质列表
			
 
				+            for item in purpose_data.get("最终实质列表", []):
			
 
				+                feature_name = item.get("目的点", "")
			
 
				+                if not feature_name:
			
 
				+                    continue
			
 
				+                if feature_name not in result["目的点"]:
			
 
				+                    result["目的点"][feature_name] = []
			
 
				+                result["目的点"][feature_name].append({
			
 
				+                    "点的名称": feature_name,
			
 
				+                    "点的描述": item.get("描述", ""),
			
 
				+                    "帖子id": post_id,
			
 
				+                    "点ID": item.get("实质ID", ""),
			
 
				+                    "类型": "实质",
			
 
				+                    "关联意图ID": item.get("关联意图ID", "")
			
 
				+                })
			
 
				 
			
 
				         # 处理关键点
			
 
				-        if "关键点" in three_points:
			
 
				-            key_points = three_points["关键点"]
			
 
				-
			
 
				-            if "key_points" in key_points and isinstance(key_points["key_points"], list):
			
 
				-                for item in key_points["key_points"]:
			
 
				-                    point_name = item.get("关键点", "")
			
 
				-                    point_desc = item.get("描述", "")
			
 
				-                    features = extract_features_from_point(item, post_id, point_name, point_desc)
			
 
				-
			
 
				-                    for feature in features:
			
 
				-                        feature_name = feature["特征名称"]
			
 
				-                        if feature_name not in result["关键点"]:
			
 
				-                            result["关键点"][feature_name] = []
			
 
				-                        result["关键点"][feature_name].append({
			
 
				-                            "点的名称": feature["点的名称"],
			
 
				-                            "点的描述": feature["点的描述"],
			
 
				-                            "帖子id": feature["帖子id"]
			
 
				-                        })
			
 
				+        if "keypoint_final" in data:
			
 
				+            keypoint_data = data["keypoint_final"]
			
 
				+            for item in keypoint_data.get("最终关键点列表", []):
			
 
				+                feature_name = item.get("关键点", "")
			
 
				+                if not feature_name:
			
 
				+                    continue
			
 
				+                if feature_name not in result["关键点"]:
			
 
				+                    result["关键点"][feature_name] = []
			
 
				+                result["关键点"][feature_name].append({
			
 
				+                    "点的名称": feature_name,
			
 
				+                    "点的描述": item.get("描述", ""),
			
 
				+                    "帖子id": post_id,
			
 
				+                    "点ID": item.get("关键点ID", ""),
			
 
				+                    "类型": item.get("类型", ""),
			
 
				+                    "支撑的ID": item.get("支撑的ID", [])
			
 
				+                })
			
 
				 
			
 
				     except Exception as e:
			
 
				         print(f"处理文件 {file_path.name} 时出错: {e}")
			
@@ -204,15 +135,7 @@ def process_single_file(file_path: Path) -> Dict[str, Dict[str, List[Dict]]]:
 
				 
			
 
				 
			
 
				 def merge_results(all_results: List[Dict]) -> Dict:
			
 
				-    """
			
 
				-    合并所有文件的提取结果
			
 
				-
			
 
				-    Args:
			
 
				-        all_results: 所有文件的结果列表
			
 
				-
			
 
				-    Returns:
			
 
				-        合并后的结果
			
 
				-    """
			
 
				+    """合并所有文件的提取结果"""
			
 
				     merged = {
			
 
				         "灵感点": {},
			
 
				         "目的点": {},
			
@@ -232,21 +155,9 @@ def merge_results(all_results: List[Dict]) -> Dict:
 
				 def convert_to_array_format(
			
 
				     merged_dict: Dict,
			
 
				     fetch_details: bool = True,
			
 
				-    time_filter: Optional[str] = None,
			
 
				     exclude_post_ids: Optional[Set[str]] = None
			
 
				 ) -> Dict:
			
 
				-    """
			
 
				-    将字典格式转换为数组格式，并添加帖子详情
			
 
				-
			
 
				-    Args:
			
 
				-        merged_dict: 字典格式的结果
			
 
				-        fetch_details: 是否获取帖子详情，默认为True
			
 
				-        time_filter: 时间过滤阈值，只保留发布时间<该时间的帖子，格式为 "YYYY-MM-DD HH:MM:SS"
			
 
				-        exclude_post_ids: 要排除的帖子ID集合
			
 
				-
			
 
				-    Returns:
			
 
				-        数组格式的结果
			
 
				-    """
			
 
				+    """将字典格式转换为数组格式，并添加帖子详情"""
			
 
				     result = {
			
 
				         "灵感点": [],
			
 
				         "目的点": [],
			
@@ -272,10 +183,7 @@ def convert_to_array_format(
 
				 
			
 
				         print(f"成功获取 {len(post_details)} 个帖子详情")
			
 
				 
			
 
				-        # 应用过滤规则
			
 
				-        filtered_count = 0
			
 
				-
			
 
				-        # 1. 如果启用帖子ID过滤
			
 
				+        # 应用帖子ID过滤
			
 
				         if exclude_post_ids:
			
 
				             print(f"\n正在应用帖子ID过滤，排除 {len(exclude_post_ids)} 个当前帖子...")
			
 
				             before_count = len(post_details)
			
@@ -285,38 +193,18 @@ def convert_to_array_format(
 
				                 print(f"  ⚠️  过滤掉 {filtered_count} 个当前帖子")
			
 
				             print(f"保留 {len(post_details)} 个帖子")
			
 
				 
			
 
				-        # 2. 如果启用时间过滤（过滤掉发布时间晚于等于阈值的帖子，避免穿越）
			
 
				-        elif time_filter:
			
 
				-            print(f"\n正在应用时间过滤 (< {time_filter})，避免使用晚于当前帖子的数据...")
			
 
				-            filtered_post_ids = set()
			
 
				-            for post_id, detail in post_details.items():
			
 
				-                publish_time = detail.get('publish_time', '')
			
 
				-                if publish_time < time_filter:
			
 
				-                    filtered_post_ids.add(post_id)
			
 
				-                else:
			
 
				-                    filtered_count += 1
			
 
				-                    print(f"  ⚠️  过滤掉帖子 {post_id} (发布时间: {publish_time}，晚于阈值)")
			
 
				-
			
 
				-            print(f"过滤掉 {filtered_count} 个帖子（穿越），保留 {len(filtered_post_ids)} 个帖子")
			
 
				-            # 更新post_details，只保留符合时间条件的
			
 
				-            post_details = {pid: detail for pid, detail in post_details.items() if pid in filtered_post_ids}
			
 
				-
			
 
				     # 转换为数组格式并添加帖子详情
			
 
				     for category in ["灵感点", "目的点", "关键点"]:
			
 
				         for feature_name, data in merged_dict[category].items():
			
 
				-            # 为每个来源添加帖子详情
			
 
				             enhanced_sources = []
			
 
				             for source in data["来源"]:
			
 
				-                # 如果启用过滤，跳过不符合条件的帖子
			
 
				-                if fetch_details and (time_filter or exclude_post_ids) and source["帖子id"] not in post_details:
			
 
				+                if fetch_details and exclude_post_ids and source["帖子id"] not in post_details:
			
 
				                     continue
			
 
				-
			
 
				                 enhanced_source = source.copy()
			
 
				                 if fetch_details and source["帖子id"] in post_details:
			
 
				                     enhanced_source["帖子详情"] = post_details[source["帖子id"]]
			
 
				                 enhanced_sources.append(enhanced_source)
			
 
				 
			
 
				-            # 只添加有来源的特征
			
 
				             if enhanced_sources:
			
 
				                 result[category].append({
			
 
				                     "特征名称": feature_name,
			
@@ -327,15 +215,7 @@ def convert_to_array_format(
 
				 
			
 
				 
			
 
				 def get_current_post_ids(current_posts_dir: Path) -> Set[str]:
			
 
				-    """
			
 
				-    获取当前帖子目录中的所有帖子ID
			
 
				-
			
 
				-    Args:
			
 
				-        current_posts_dir: 当前帖子目录路径
			
 
				-
			
 
				-    Returns:
			
 
				-        当前帖子ID集合
			
 
				-    """
			
 
				+    """获取当前帖子目录中的所有帖子ID"""
			
 
				     if not current_posts_dir.exists():
			
 
				         print(f"警告: 当前帖子目录不存在: {current_posts_dir}")
			
 
				         return set()
			
@@ -358,60 +238,10 @@ def get_current_post_ids(current_posts_dir: Path) -> Set[str]:
 
				     return post_ids
			
 
				 
			
 
				 
			
 
				-def get_earliest_publish_time(current_posts_dir: Path) -> Optional[str]:
			
 
				-    """
			
 
				-    获取当前帖子目录中最早的发布时间
			
 
				-
			
 
				-    Args:
			
 
				-        current_posts_dir: 当前帖子目录路径
			
 
				-
			
 
				-    Returns:
			
 
				-        最早的发布时间字符串，格式为 "YYYY-MM-DD HH:MM:SS"
			
 
				-    """
			
 
				-    if not current_posts_dir.exists():
			
 
				-        print(f"警告: 当前帖子目录不存在: {current_posts_dir}")
			
 
				-        return None
			
 
				-
			
 
				-    json_files = list(current_posts_dir.glob("*.json"))
			
 
				-    if not json_files:
			
 
				-        print(f"警告: 当前帖子目录为空: {current_posts_dir}")
			
 
				-        return None
			
 
				-
			
 
				-    print(f"\n正在获取当前帖子的发布时间...")
			
 
				-    print(f"找到 {len(json_files)} 个当前帖子")
			
 
				-
			
 
				-    earliest_time = None
			
 
				-    for file_path in json_files:
			
 
				-        post_id = extract_post_id_from_filename(file_path.name)
			
 
				-        if not post_id:
			
 
				-            continue
			
 
				-
			
 
				-        try:
			
 
				-            detail = get_post_detail(post_id)
			
 
				-            if detail and 'publish_time' in detail:
			
 
				-                publish_time = detail['publish_time']
			
 
				-                if earliest_time is None or publish_time < earliest_time:
			
 
				-                    earliest_time = publish_time
			
 
				-                    print(f"  更新最早时间: {publish_time} (帖子: {post_id})")
			
 
				-        except Exception as e:
			
 
				-            print(f"  警告: 获取帖子 {post_id} 发布时间失败: {e}")
			
 
				-
			
 
				-    if earliest_time:
			
 
				-        print(f"\n当前帖子最早发布时间: {earliest_time}")
			
 
				-    else:
			
 
				-        print("\n警告: 未能获取到任何当前帖子的发布时间")
			
 
				-
			
 
				-    return earliest_time
			
 
				-
			
 
				-
			
 
				 def main():
			
 
				-    # 使用路径配置
			
 
				     config = PathConfig()
			
 
				-
			
 
				-    # 确保输出目录存在
			
 
				     config.ensure_dirs()
			
 
				 
			
 
				-    # 获取路径
			
 
				     input_dir = config.historical_posts_dir
			
 
				     current_posts_dir = config.current_posts_dir
			
 
				     output_file = config.feature_source_mapping_file
			
@@ -425,56 +255,39 @@ def main():
 
				 
			
 
				     print(f"\n正在扫描目录: {input_dir}")
			
 
				 
			
 
				-    # 获取所有JSON文件
			
 
				     json_files = list(input_dir.glob("*.json"))
			
 
				     print(f"找到 {len(json_files)} 个JSON文件")
			
 
				 
			
 
				-    # 处理所有文件
			
 
				     all_results = []
			
 
				     for i, file_path in enumerate(json_files, 1):
			
 
				         print(f"处理文件 [{i}/{len(json_files)}]: {file_path.name}")
			
 
				         result = process_single_file(file_path)
			
 
				         all_results.append(result)
			
 
				 
			
 
				-    # 合并结果
			
 
				     print("\n正在合并结果...")
			
 
				     merged_result = merge_results(all_results)
			
 
				 
			
 
				-    # 根据配置的过滤模式应用过滤
			
 
				-    filter_mode = config.filter_mode
			
 
				-    time_filter = None
			
 
				+    # 过滤当前帖子
			
 
				     exclude_post_ids = None
			
 
				-
			
 
				-    if filter_mode == "exclude_current_posts":
			
 
				-        # 新规则：排除当前帖子ID
			
 
				+    if config.filter_mode == "exclude_current_posts":
			
 
				         print("\n应用过滤规则: 排除当前帖子ID")
			
 
				         exclude_post_ids = get_current_post_ids(current_posts_dir)
			
 
				-    elif filter_mode == "time_based":
			
 
				-        # 旧规则：基于发布时间
			
 
				-        print("\n应用过滤规则: 基于发布时间")
			
 
				-        time_filter = get_earliest_publish_time(current_posts_dir)
			
 
				-    elif filter_mode == "none":
			
 
				+    elif config.filter_mode == "none":
			
 
				         print("\n过滤模式: none，不应用任何过滤")
			
 
				-    else:
			
 
				-        print(f"\n警告: 未知的过滤模式 '{filter_mode}'，不应用过滤")
			
 
				 
			
 
				-    # 转换为数组格式（带过滤）
			
 
				     print("正在转换为数组格式...")
			
 
				     final_result = convert_to_array_format(
			
 
				         merged_result,
			
 
				         fetch_details=True,
			
 
				-        time_filter=time_filter,
			
 
				         exclude_post_ids=exclude_post_ids
			
 
				     )
			
 
				 
			
 
				-    # 统计信息
			
 
				     print(f"\n提取统计:")
			
 
				     for category in ["灵感点", "目的点", "关键点"]:
			
 
				         feature_count = len(final_result[category])
			
 
				         source_count = sum(len(item["特征来源"]) for item in final_result[category])
			
 
				         print(f"  {category}: {feature_count} 个特征, {source_count} 个来源")
			
 
				 
			
 
				-    # 保存结果
			
 
				     print(f"\n正在保存结果到: {output_file}")
			
 
				     with open(output_file, "w", encoding="utf-8") as f:
			
 
				         json.dump(final_result, f, ensure_ascii=False, indent=4)
			
--- a/script/data_processing/extract_nodes_and_edges.py
+++ b/script/data_processing/extract_nodes_and_edges.py
@@ -58,11 +58,10 @@ def build_node_id(dimension: str, node_type: str, name: str) -> str:
 
				 
			
 
				 
			
 
				 def extract_post_id_from_filename(filename: str) -> str:
			
 
				-    """从文件名中提取帖子ID"""
			
 
				-    match = re.match(r'^([^_]+)_', filename)
			
 
				-    if match:
			
 
				-        return match.group(1)
			
 
				-    return ""
			
 
				+    """从文件名中提取帖子ID
			
 
				+    格式: 68a6b96f000000001d006058.json
			
 
				+    """
			
 
				+    return filename.replace('.json', '')
			
 
				 
			
 
				 
			
 
				 def get_current_post_ids(current_posts_dir: Path) -> Set[str]:
			
@@ -111,11 +110,17 @@ def collect_all_post_ids_from_edges(edges: List[Dict]) -> Set[str]:
 
				     """从边列表中收集所有帖子ID"""
			
 
				     post_ids = set()
			
 
				     for edge in edges:
			
 
				-        if edge.get("边类型") in ("分类共现（跨点）", "标签共现"):
			
 
				-            edge_details = edge.get("边详情", {})
			
 
				+        edge_type = edge.get("边类型", "")
			
 
				+        edge_details = edge.get("边详情", {})
			
 
				+
			
 
				+        if edge_type in ("分类共现（跨点）", "标签共现"):
			
 
				             common_post_ids = edge_details.get("共同帖子ID", [])
			
 
				             post_ids.update(common_post_ids)
			
 
				-        # 点内共现边不包含帖子ID
			
 
				+        elif edge_type in ("支撑", "关联意图"):
			
 
				+            # 新边类型使用帖子ID列表
			
 
				+            post_id_list = edge_details.get("帖子ID列表", [])
			
 
				+            post_ids.update(post_id_list)
			
 
				+        # 点内共现边、属于边、包含边不包含帖子ID
			
 
				     return post_ids
			
 
				 
			
 
				 
			
@@ -468,44 +473,33 @@ def extract_tags_from_post(post_data: Dict) -> Dict[str, List[str]]:
 
				         "关键点": []
			
 
				     }
			
 
				 
			
 
				-    if "三点解构" not in post_data:
			
 
				-        return tags_by_dimension
			
 
				-
			
 
				-    three_points = post_data["三点解构"]
			
 
				-
			
 
				-    # 提取灵感点的特征
			
 
				-    if "灵感点" in three_points:
			
 
				-        inspiration = three_points["灵感点"]
			
 
				-        for section in ["全新内容", "共性差异", "共性内容"]:
			
 
				-            if section in inspiration and isinstance(inspiration[section], list):
			
 
				-                for item in inspiration[section]:
			
 
				-                    if "提取的特征" in item and isinstance(item["提取的特征"], list):
			
 
				-                        for feature in item["提取的特征"]:
			
 
				-                            tag_name = feature.get("特征名称", "")
			
 
				-                            if tag_name:
			
 
				-                                tags_by_dimension["灵感点"].append(tag_name)
			
 
				-
			
 
				-    # 提取目的点的特征
			
 
				-    if "目的点" in three_points:
			
 
				-        purpose = three_points["目的点"]
			
 
				-        if "purposes" in purpose and isinstance(purpose["purposes"], list):
			
 
				-            for item in purpose["purposes"]:
			
 
				-                if "提取的特征" in item and isinstance(item["提取的特征"], list):
			
 
				-                    for feature in item["提取的特征"]:
			
 
				-                        tag_name = feature.get("特征名称", "")
			
 
				-                        if tag_name:
			
 
				-                            tags_by_dimension["目的点"].append(tag_name)
			
 
				-
			
 
				-    # 提取关键点的特征
			
 
				-    if "关键点" in three_points:
			
 
				-        key_points = three_points["关键点"]
			
 
				-        if "key_points" in key_points and isinstance(key_points["key_points"], list):
			
 
				-            for item in key_points["key_points"]:
			
 
				-                if "提取的特征" in item and isinstance(item["提取的特征"], list):
			
 
				-                    for feature in item["提取的特征"]:
			
 
				-                        tag_name = feature.get("特征名称", "")
			
 
				-                        if tag_name:
			
 
				-                            tags_by_dimension["关键点"].append(tag_name)
			
 
				+    # 提取灵感点
			
 
				+    if "inspiration_final_result" in post_data:
			
 
				+        inspiration_data = post_data["inspiration_final_result"]
			
 
				+        for item in inspiration_data.get("最终灵感点列表", []):
			
 
				+            tag_name = item.get("灵感点", "")
			
 
				+            if tag_name:
			
 
				+                tags_by_dimension["灵感点"].append(tag_name)
			
 
				+
			
 
				+    # 提取目的点（意图+实质）
			
 
				+    if "purpose_final_result" in post_data:
			
 
				+        purpose_data = post_data["purpose_final_result"]
			
 
				+        for item in purpose_data.get("最终意图列表", []):
			
 
				+            tag_name = item.get("目的点", "")
			
 
				+            if tag_name:
			
 
				+                tags_by_dimension["目的点"].append(tag_name)
			
 
				+        for item in purpose_data.get("最终实质列表", []):
			
 
				+            tag_name = item.get("目的点", "")
			
 
				+            if tag_name:
			
 
				+                tags_by_dimension["目的点"].append(tag_name)
			
 
				+
			
 
				+    # 提取关键点
			
 
				+    if "keypoint_final" in post_data:
			
 
				+        keypoint_data = post_data["keypoint_final"]
			
 
				+        for item in keypoint_data.get("最终关键点列表", []):
			
 
				+            tag_name = item.get("关键点", "")
			
 
				+            if tag_name:
			
 
				+                tags_by_dimension["关键点"].append(tag_name)
			
 
				 
			
 
				     return tags_by_dimension
			
 
				 
			
@@ -596,6 +590,173 @@ def extract_tag_cooccurrence_edges(historical_posts_dir: Path, exclude_post_ids:
 
				     return edges
			
 
				 
			
 
				 
			
 
				+# ========== 支撑边和关联意图边提取（新版数据结构）==========
			
 
				+
			
 
				+def extract_support_and_intent_edges(historical_posts_dir: Path, exclude_post_ids: Set[str] = None) -> tuple[List[Dict], List[Dict]]:
			
 
				+    """
			
 
				+    从历史帖子解构结果中提取支撑边和关联意图边（仅新版数据结构）
			
 
				+
			
 
				+    支撑边：关键点 -> 灵感点/意图/实质
			
 
				+    关联意图边：实质 -> 意图
			
 
				+
			
 
				+    Args:
			
 
				+        historical_posts_dir: 历史帖子解构结果目录
			
 
				+        exclude_post_ids: 要排除的帖子ID集合
			
 
				+
			
 
				+    Returns:
			
 
				+        (支撑边列表, 关联意图边列表)
			
 
				+    """
			
 
				+    if exclude_post_ids is None:
			
 
				+        exclude_post_ids = set()
			
 
				+
			
 
				+    support_edges = []  # 支撑边
			
 
				+    intent_edges = []   # 关联意图边
			
 
				+    seen_support_edges = set()
			
 
				+    seen_intent_edges = set()
			
 
				+
			
 
				+    if not historical_posts_dir.exists():
			
 
				+        print(f"警告: 历史帖子目录不存在: {historical_posts_dir}")
			
 
				+        return [], []
			
 
				+
			
 
				+    json_files = list(historical_posts_dir.glob("*.json"))
			
 
				+    print(f"找到 {len(json_files)} 个历史帖子文件")
			
 
				+
			
 
				+    for file_path in json_files:
			
 
				+        # 提取帖子ID
			
 
				+        post_id = extract_post_id_from_filename(file_path.name)
			
 
				+        if not post_id:
			
 
				+            post_id = file_path.stem
			
 
				+
			
 
				+        # 跳过排除的帖子
			
 
				+        if post_id in exclude_post_ids:
			
 
				+            continue
			
 
				+
			
 
				+        try:
			
 
				+            with open(file_path, "r", encoding="utf-8") as f:
			
 
				+                post_data = json.load(f)
			
 
				+
			
 
				+            # 只处理新版数据结构
			
 
				+            if "keypoint_final" not in post_data and "purpose_final_result" not in post_data:
			
 
				+                continue
			
 
				+
			
 
				+            # 构建帖子内的ID到名称映射
			
 
				+            id_to_name = {}
			
 
				+            id_to_type = {}  # 记录ID对应的类型（灵感点/意图/实质）
			
 
				+
			
 
				+            # 收集灵感点ID
			
 
				+            if "inspiration_final_result" in post_data:
			
 
				+                for item in post_data["inspiration_final_result"].get("最终灵感点列表", []):
			
 
				+                    item_id = item.get("id", "")
			
 
				+                    item_name = item.get("灵感点", "")
			
 
				+                    if item_id and item_name:
			
 
				+                        id_to_name[item_id] = item_name
			
 
				+                        id_to_type[item_id] = "灵感点"
			
 
				+
			
 
				+            # 收集意图和实质ID
			
 
				+            if "purpose_final_result" in post_data:
			
 
				+                purpose_data = post_data["purpose_final_result"]
			
 
				+                # 意图
			
 
				+                for item in purpose_data.get("最终意图列表", []):
			
 
				+                    item_id = item.get("意图ID", "")
			
 
				+                    item_name = item.get("目的点", "")
			
 
				+                    if item_id and item_name:
			
 
				+                        id_to_name[item_id] = item_name
			
 
				+                        id_to_type[item_id] = "意图"
			
 
				+                # 实质
			
 
				+                for item in purpose_data.get("最终实质列表", []):
			
 
				+                    item_id = item.get("实质ID", "")
			
 
				+                    item_name = item.get("目的点", "")
			
 
				+                    related_intent_id = item.get("关联意图ID", "")
			
 
				+                    if item_id and item_name:
			
 
				+                        id_to_name[item_id] = item_name
			
 
				+                        id_to_type[item_id] = "实质"
			
 
				+
			
 
				+                    # 提取关联意图边：实质 -> 意图
			
 
				+                    if item_id and related_intent_id and related_intent_id in id_to_name:
			
 
				+                        substance_name = item_name
			
 
				+                        intent_name = id_to_name[related_intent_id]
			
 
				+
			
 
				+                        # 构建节点ID（实质和意图都属于目的点维度）
			
 
				+                        substance_node_id = build_node_id("目的点", "标签", substance_name)
			
 
				+                        intent_node_id = build_node_id("目的点", "标签", intent_name)
			
 
				+
			
 
				+                        edge_key = (substance_node_id, intent_node_id)
			
 
				+                        if edge_key not in seen_intent_edges:
			
 
				+                            seen_intent_edges.add(edge_key)
			
 
				+                            intent_edges.append({
			
 
				+                                "源节点ID": substance_node_id,
			
 
				+                                "目标节点ID": intent_node_id,
			
 
				+                                "边类型": "关联意图",
			
 
				+                                "边详情": {
			
 
				+                                    "源类型": "实质",
			
 
				+                                    "目标类型": "意图",
			
 
				+                                    "帖子ID列表": [post_id]
			
 
				+                                }
			
 
				+                            })
			
 
				+                        else:
			
 
				+                            # 已存在的边，添加帖子ID
			
 
				+                            for edge in intent_edges:
			
 
				+                                if edge["源节点ID"] == substance_node_id and edge["目标节点ID"] == intent_node_id:
			
 
				+                                    if post_id not in edge["边详情"]["帖子ID列表"]:
			
 
				+                                        edge["边详情"]["帖子ID列表"].append(post_id)
			
 
				+                                    break
			
 
				+
			
 
				+            # 收集关键点ID并提取支撑边
			
 
				+            if "keypoint_final" in post_data:
			
 
				+                for item in post_data["keypoint_final"].get("最终关键点列表", []):
			
 
				+                    kp_id = item.get("关键点ID", "")
			
 
				+                    kp_name = item.get("关键点", "")
			
 
				+                    support_ids = item.get("支撑的ID", [])
			
 
				+
			
 
				+                    if not kp_name or not support_ids:
			
 
				+                        continue
			
 
				+
			
 
				+                    # 关键点节点ID
			
 
				+                    kp_node_id = build_node_id("关键点", "标签", kp_name)
			
 
				+
			
 
				+                    # 遍历支撑的ID
			
 
				+                    for support_id in support_ids:
			
 
				+                        if support_id not in id_to_name:
			
 
				+                            continue
			
 
				+
			
 
				+                        target_name = id_to_name[support_id]
			
 
				+                        target_type = id_to_type[support_id]
			
 
				+
			
 
				+                        # 确定目标节点的维度
			
 
				+                        if target_type == "灵感点":
			
 
				+                            target_dimension = "灵感点"
			
 
				+                        else:  # 意图或实质
			
 
				+                            target_dimension = "目的点"
			
 
				+
			
 
				+                        target_node_id = build_node_id(target_dimension, "标签", target_name)
			
 
				+
			
 
				+                        edge_key = (kp_node_id, target_node_id)
			
 
				+                        if edge_key not in seen_support_edges:
			
 
				+                            seen_support_edges.add(edge_key)
			
 
				+                            support_edges.append({
			
 
				+                                "源节点ID": kp_node_id,
			
 
				+                                "目标节点ID": target_node_id,
			
 
				+                                "边类型": "支撑",
			
 
				+                                "边详情": {
			
 
				+                                    "源类型": "关键点",
			
 
				+                                    "目标类型": target_type,
			
 
				+                                    "帖子ID列表": [post_id]
			
 
				+                                }
			
 
				+                            })
			
 
				+                        else:
			
 
				+                            # 已存在的边，添加帖子ID
			
 
				+                            for edge in support_edges:
			
 
				+                                if edge["源节点ID"] == kp_node_id and edge["目标节点ID"] == target_node_id:
			
 
				+                                    if post_id not in edge["边详情"]["帖子ID列表"]:
			
 
				+                                        edge["边详情"]["帖子ID列表"].append(post_id)
			
 
				+                                    break
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"  警告: 处理文件 {file_path.name} 时出错: {e}")
			
 
				+
			
 
				+    return support_edges, intent_edges
			
 
				+
			
 
				+
			
 
				 # ========== 分类-分类边提取 ==========
			
 
				 
			
 
				 def extract_category_edges_from_associations(associations_data: Dict) -> List[Dict]:
			
@@ -762,8 +923,15 @@ def main():
 
				 
			
 
				     # 输入文件路径
			
 
				     pattern_file = config.pattern_cluster_file
			
 
				-    associations_file = config.account_dir / "pattern相关文件/optimization/dimension_associations_analysis.json"
			
 
				-    intra_associations_file = config.account_dir / "pattern相关文件/optimization/intra_dimension_associations_analysis.json"
			
 
				+    # 尝试新路径，如果不存在则使用旧路径
			
 
				+    associations_file_new = config.account_dir / "pattern相关文件/detail/dimension_associations_analysis.json"
			
 
				+    associations_file_old = config.account_dir / "pattern相关文件/optimization/dimension_associations_analysis.json"
			
 
				+    associations_file = associations_file_new if associations_file_new.exists() else associations_file_old
			
 
				+
			
 
				+    intra_associations_file_new = config.account_dir / "pattern相关文件/detail/intra_dimension_associations_analysis.json"
			
 
				+    intra_associations_file_old = config.account_dir / "pattern相关文件/optimization/intra_dimension_associations_analysis.json"
			
 
				+    intra_associations_file = intra_associations_file_new if intra_associations_file_new.exists() else intra_associations_file_old
			
 
				+
			
 
				     current_posts_dir = config.current_posts_dir
			
 
				 
			
 
				     # 输出文件路径
			
@@ -906,11 +1074,22 @@ def main():
 
				     all_edges.extend(tag_cooccurrence_edges)
			
 
				     print(f"  标签-标签共现边: {len(tag_cooccurrence_edges)} 条")
			
 
				 
			
 
				+    # ===== 提取支撑边和关联意图边（新版数据结构）=====
			
 
				+    print("\n" + "="*60)
			
 
				+    print("提取支撑边和关联意图边（新版数据结构）...")
			
 
				+    support_edges, intent_edges = extract_support_and_intent_edges(historical_posts_dir, exclude_post_ids)
			
 
				+    all_edges.extend(support_edges)
			
 
				+    all_edges.extend(intent_edges)
			
 
				+    print(f"  支撑边: {len(support_edges)} 条")
			
 
				+    print(f"  关联意图边: {len(intent_edges)} 条")
			
 
				+
			
 
				     # 更新总计
			
 
				     print(f"\n总计: {len(all_edges)} 条边")
			
 
				     print(f"  分类共现（跨点）边: {len(category_edges)}")
			
 
				     print(f"  分类共现（点内）边: {len(intra_category_edges)}")
			
 
				     print(f"  标签共现边: {len(tag_cooccurrence_edges)}")
			
 
				+    print(f"  支撑边: {len(support_edges)}")
			
 
				+    print(f"  关联意图边: {len(intent_edges)}")
			
 
				     print(f"  属于边: {belong_count}")
			
 
				     print(f"  包含边: {contain_count}")