run_graph_pipeline.sh 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. #!/bin/bash
  2. # 图谱构建与可视化流程(步骤5-9)
  3. #
  4. # 依赖前置步骤(1-4)已执行完成:
  5. # 1. extract_feature_categories.py
  6. # 2. extract_features_from_posts.py
  7. # 3. extract_current_posts.py
  8. # 4. match_inspiration_features.py
  9. #
  10. # 本脚本执行:
  11. # 5. filter_how_results.py - 过滤how解构结果
  12. # 6. extract_nodes_and_edges.py - 提取节点和边
  13. # 7. build_persona_tree.py - 构建人设树
  14. # 8. build_match_graph.py - 构建匹配图谱
  15. # 9. build_post_tree.py - 构建帖子树
  16. # 10. visualize_match_graph.py - 生成可视化HTML
  17. #
  18. # 使用方式:
  19. # ./run_graph_pipeline.sh # 使用默认账号
  20. # ./run_graph_pipeline.sh 阿里多多酱 # 指定账号
  21. # ACCOUNT_NAME=xxx ./run_graph_pipeline.sh
  22. set -e # 遇到错误立即退出
  23. # 获取脚本所在目录的绝对路径
  24. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  25. PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
  26. cd "$PROJECT_ROOT"
  27. # 颜色定义
  28. GREEN='\033[0;32m'
  29. RED='\033[0;31m'
  30. YELLOW='\033[1;33m'
  31. BLUE='\033[0;34m'
  32. NC='\033[0m'
  33. print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
  34. print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
  35. print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
  36. print_step() { echo -e "${YELLOW}[$1]${NC} $2"; }
  37. # 执行单个步骤
  38. run_step() {
  39. local step_num=$1
  40. local step_name=$2
  41. local script_name=$3
  42. print_step "$step_num/6" "$step_name"
  43. if python "script/data_processing/$script_name"; then
  44. print_success "$step_name 完成"
  45. echo ""
  46. return 0
  47. else
  48. print_error "$step_name 失败"
  49. return 1
  50. fi
  51. }
  52. # 主处理函数
  53. process_account() {
  54. local account_name=$1
  55. echo ""
  56. echo "=========================================="
  57. echo "图谱构建与可视化流程"
  58. echo "账号: $account_name"
  59. echo "项目: $PROJECT_ROOT"
  60. echo "=========================================="
  61. echo ""
  62. # 设置环境变量
  63. export ACCOUNT_NAME="$account_name"
  64. # 步骤5: 过滤how解构结果
  65. run_step 1 "过滤how解构结果" "filter_how_results.py" || return 1
  66. # 步骤6: 提取节点和边
  67. run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
  68. # 步骤7: 构建人设树
  69. run_step 3 "构建人设树" "build_persona_tree.py" || return 1
  70. # 步骤8: 构建匹配图谱
  71. run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
  72. # 步骤9: 构建帖子树
  73. run_step 5 "构建帖子树" "build_post_tree.py" || return 1
  74. # 步骤10: 生成可视化HTML
  75. run_step 6 "生成可视化HTML" "visualize_match_graph.py" || return 1
  76. echo "=========================================="
  77. print_success "图谱构建与可视化流程完成!"
  78. echo "=========================================="
  79. }
  80. # 获取默认账号
  81. get_default_account() {
  82. python -c "
  83. import json
  84. from pathlib import Path
  85. config_file = Path('config/accounts.json')
  86. with open(config_file) as f:
  87. config = json.load(f)
  88. print(config.get('default_account', ''))
  89. "
  90. }
  91. # 主逻辑
  92. main() {
  93. local account_name=""
  94. # 解析参数
  95. if [ -n "$1" ]; then
  96. account_name="$1"
  97. elif [ -n "$ACCOUNT_NAME" ]; then
  98. account_name="$ACCOUNT_NAME"
  99. else
  100. account_name=$(get_default_account)
  101. if [ -z "$account_name" ]; then
  102. print_error "未指定账号,请通过参数或环境变量指定"
  103. echo "用法: $0 <账号名>"
  104. exit 1
  105. fi
  106. print_info "使用默认账号: $account_name"
  107. fi
  108. process_account "$account_name"
  109. }
  110. main "$@"