ソースを参照

feat: 添加帖子树构建步骤,优化tab显示为日期+标题降序

- run_graph_pipeline.sh: 添加 build_post_tree.py 作为步骤9
- visualize_match_graph.py: tab标题改为"日期+标题"格式,按发布日期降序排列

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
yangxiaohui 4 日 前
コミット
dbfbedca02

+ 8 - 4
script/data_processing/run_graph_pipeline.sh

@@ -12,7 +12,8 @@
 #   6. extract_nodes_and_edges.py - 提取节点和边
 #   7. build_persona_tree.py      - 构建人设树
 #   8. build_match_graph.py       - 构建匹配图谱
-#   9. visualize_match_graph.py   - 生成可视化HTML
+#   9. build_post_tree.py         - 构建帖子树
+#  10. visualize_match_graph.py   - 生成可视化HTML
 #
 # 使用方式:
 #   ./run_graph_pipeline.sh              # 使用默认账号
@@ -45,7 +46,7 @@ run_step() {
     local step_name=$2
     local script_name=$3
 
-    print_step "$step_num/5" "$step_name"
+    print_step "$step_num/6" "$step_name"
 
     if python "script/data_processing/$script_name"; then
         print_success "$step_name 完成"
@@ -84,8 +85,11 @@ process_account() {
     # 步骤8: 构建匹配图谱
     run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
 
-    # 步骤9: 生成可视化HTML
-    run_step 5 "生成可视化HTML" "visualize_match_graph.py" || return 1
+    # 步骤9: 构建帖子树
+    run_step 5 "构建帖子树" "build_post_tree.py" || return 1
+
+    # 步骤10: 生成可视化HTML
+    run_step 6 "生成可视化HTML" "visualize_match_graph.py" || return 1
 
     echo "=========================================="
     print_success "图谱构建与可视化流程完成!"

+ 26 - 4
script/data_processing/visualize_match_graph.py

@@ -5362,17 +5362,39 @@ def generate_combined_html(all_graph_data: List[Dict], persona_tree_data: Dict,
         persona_tree_data: 完整的人设树数据(节点和边)
         output_file: 输出文件路径
     """
+    # 按发布日期降序排序
+    def get_publish_date(data):
+        post_detail = data.get("postDetail", {})
+        publish_time = post_detail.get("publish_time", "")
+        # 提取日期部分(假设格式为 "YYYY-MM-DD" 或 "YYYY-MM-DD HH:MM:SS")
+        if publish_time:
+            return publish_time[:10]
+        return "0000-00-00"
+
+    all_graph_data.sort(key=get_publish_date, reverse=True)
+
     # 生成帖子选项HTML
     tabs_html = ""
     for i, data in enumerate(all_graph_data):
         post_title = data.get("postTitle", "")
-        # 使用帖子标题,如果太长则截断
+        post_detail = data.get("postDetail", {})
+        publish_time = post_detail.get("publish_time", "")
+        # 提取日期部分
+        date_str = publish_time[:10] if publish_time else ""
+
+        # 使用"日期+标题"格式
         if post_title:
-            option_name = post_title[:30] + "..." if len(post_title) > 30 else post_title
+            title_part = post_title[:25] + "..." if len(post_title) > 25 else post_title
         else:
-            option_name = f"帖子 {i+1}"
+            title_part = f"帖子 {i+1}"
+
+        if date_str:
+            option_name = f"{date_str} {title_part}"
+        else:
+            option_name = title_part
+
         selected = "selected" if i == 0 else ""
-        tabs_html += f'<option value="{i}" {selected}>{i+1}. {option_name}</option>\n'
+        tabs_html += f'<option value="{i}" {selected}>{option_name}</option>\n'
 
     # 生成HTML
     html_content = HTML_TEMPLATE.format(