run_graph_pipeline.sh 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. #!/bin/bash
  2. # 图谱构建与可视化流程(步骤5-9)
  3. #
  4. # 依赖前置步骤(1-4)已执行完成:
  5. # 1. extract_feature_categories.py
  6. # 2. extract_features_from_posts.py
  7. # 3. extract_current_posts.py
  8. # 4. match_inspiration_features.py
  9. #
  10. # 本脚本执行:
  11. # 5. filter_how_results.py - 过滤how解构结果
  12. # 6. extract_nodes_and_edges.py - 提取节点和边
  13. # 7. build_persona_graph.py - 构建人设图谱
  14. # 8. build_match_graph.py - 构建匹配图谱
  15. # 9. build_post_graph.py - 构建帖子图谱
  16. # 10. visualize_match_graph.py - 生成匹配图谱可视化HTML
  17. # 11. visualization/build.py - 生成人设图谱可视化HTML
  18. #
  19. # 使用方式:
  20. # ./run_graph_pipeline.sh # 使用默认账号
  21. # ./run_graph_pipeline.sh 阿里多多酱 # 指定账号
  22. # ACCOUNT_NAME=xxx ./run_graph_pipeline.sh
  23. set -e # 遇到错误立即退出
  24. # 获取脚本所在目录的绝对路径
  25. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  26. PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
  27. cd "$PROJECT_ROOT"
  28. # 颜色定义
  29. GREEN='\033[0;32m'
  30. RED='\033[0;31m'
  31. YELLOW='\033[1;33m'
  32. BLUE='\033[0;34m'
  33. NC='\033[0m'
  34. print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
  35. print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
  36. print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
  37. print_step() { echo -e "${YELLOW}[$1]${NC} $2"; }
  38. # 执行单个步骤
  39. run_step() {
  40. local step_num=$1
  41. local step_name=$2
  42. local script_name=$3
  43. print_step "$step_num/7" "$step_name"
  44. if python "script/data_processing/$script_name"; then
  45. print_success "$step_name 完成"
  46. echo ""
  47. return 0
  48. else
  49. print_error "$step_name 失败"
  50. return 1
  51. fi
  52. }
  53. # 主处理函数
  54. process_account() {
  55. local account_name=$1
  56. echo ""
  57. echo "=========================================="
  58. echo "图谱构建与可视化流程"
  59. echo "账号: $account_name"
  60. echo "项目: $PROJECT_ROOT"
  61. echo "=========================================="
  62. echo ""
  63. # 设置环境变量
  64. export ACCOUNT_NAME="$account_name"
  65. # 步骤5: 过滤how解构结果
  66. run_step 1 "过滤how解构结果" "filter_how_results.py" || return 1
  67. # 步骤6: 提取节点和边
  68. run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
  69. # 步骤7: 构建人设图谱
  70. run_step 3 "构建人设图谱" "build_persona_graph.py" || return 1
  71. # 步骤8: 构建匹配图谱
  72. run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
  73. # 步骤9: 构建帖子图谱
  74. run_step 5 "构建帖子图谱" "build_post_graph.py" || return 1
  75. # 步骤10: 生成匹配图谱可视化HTML
  76. run_step 6 "生成匹配图谱可视化" "visualize_match_graph.py" || return 1
  77. # 步骤11: 生成人设图谱可视化HTML
  78. print_step "7/7" "生成人设图谱可视化"
  79. if python "script/visualization/build.py"; then
  80. print_success "生成人设图谱可视化 完成"
  81. echo ""
  82. else
  83. print_error "生成人设图谱可视化 失败"
  84. return 1
  85. fi
  86. echo "=========================================="
  87. print_success "图谱构建与可视化流程完成!"
  88. echo "=========================================="
  89. }
  90. # 获取默认账号
  91. get_default_account() {
  92. python -c "
  93. import json
  94. from pathlib import Path
  95. config_file = Path('config/accounts.json')
  96. with open(config_file) as f:
  97. config = json.load(f)
  98. print(config.get('default_account', ''))
  99. "
  100. }
  101. # 主逻辑
  102. main() {
  103. local account_name=""
  104. # 解析参数
  105. if [ -n "$1" ]; then
  106. account_name="$1"
  107. elif [ -n "$ACCOUNT_NAME" ]; then
  108. account_name="$ACCOUNT_NAME"
  109. else
  110. account_name=$(get_default_account)
  111. if [ -z "$account_name" ]; then
  112. print_error "未指定账号,请通过参数或环境变量指定"
  113. echo "用法: $0 <账号名>"
  114. exit 1
  115. fi
  116. print_info "使用默认账号: $account_name"
  117. fi
  118. process_account "$account_name"
  119. }
  120. main "$@"