run_graph_pipeline.sh 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #!/bin/bash
  2. # 图谱构建与可视化流程(步骤5-9)
  3. #
  4. # 依赖前置步骤(1-4)已执行完成:
  5. # 1. extract_feature_categories.py
  6. # 2. extract_features_from_posts.py
  7. # 3. extract_current_posts.py
  8. # 4. match_inspiration_features.py
  9. #
  10. # 本脚本执行:
  11. # 5. filter_how_results.py - 过滤how解构结果
  12. # 6. extract_nodes_and_edges.py - 提取节点和边
  13. # 7. build_persona_tree.py - 构建人设树
  14. # 8. build_match_graph.py - 构建匹配图谱
  15. # 9. visualize_match_graph.py - 生成可视化HTML
  16. #
  17. # 使用方式:
  18. # ./run_graph_pipeline.sh # 使用默认账号
  19. # ./run_graph_pipeline.sh 阿里多多酱 # 指定账号
  20. # ACCOUNT_NAME=xxx ./run_graph_pipeline.sh
  21. set -e # 遇到错误立即退出
  22. # 获取脚本所在目录的绝对路径
  23. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  24. PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
  25. cd "$PROJECT_ROOT"
  26. # 颜色定义
  27. GREEN='\033[0;32m'
  28. RED='\033[0;31m'
  29. YELLOW='\033[1;33m'
  30. BLUE='\033[0;34m'
  31. NC='\033[0m'
  32. print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
  33. print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
  34. print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
  35. print_step() { echo -e "${YELLOW}[$1]${NC} $2"; }
  36. # 执行单个步骤
  37. run_step() {
  38. local step_num=$1
  39. local step_name=$2
  40. local script_name=$3
  41. print_step "$step_num/5" "$step_name"
  42. if python "script/data_processing/$script_name"; then
  43. print_success "$step_name 完成"
  44. echo ""
  45. return 0
  46. else
  47. print_error "$step_name 失败"
  48. return 1
  49. fi
  50. }
  51. # 主处理函数
  52. process_account() {
  53. local account_name=$1
  54. echo ""
  55. echo "=========================================="
  56. echo "图谱构建与可视化流程"
  57. echo "账号: $account_name"
  58. echo "项目: $PROJECT_ROOT"
  59. echo "=========================================="
  60. echo ""
  61. # 设置环境变量
  62. export ACCOUNT_NAME="$account_name"
  63. # 步骤5: 过滤how解构结果
  64. run_step 1 "过滤how解构结果" "filter_how_results.py" || return 1
  65. # 步骤6: 提取节点和边
  66. run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
  67. # 步骤7: 构建人设树
  68. run_step 3 "构建人设树" "build_persona_tree.py" || return 1
  69. # 步骤8: 构建匹配图谱
  70. run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
  71. # 步骤9: 生成可视化HTML
  72. run_step 5 "生成可视化HTML" "visualize_match_graph.py" || return 1
  73. echo "=========================================="
  74. print_success "图谱构建与可视化流程完成!"
  75. echo "=========================================="
  76. }
  77. # 获取默认账号
  78. get_default_account() {
  79. python -c "
  80. import json
  81. from pathlib import Path
  82. config_file = Path('config/accounts.json')
  83. with open(config_file) as f:
  84. config = json.load(f)
  85. print(config.get('default_account', ''))
  86. "
  87. }
  88. # 主逻辑
  89. main() {
  90. local account_name=""
  91. # 解析参数
  92. if [ -n "$1" ]; then
  93. account_name="$1"
  94. elif [ -n "$ACCOUNT_NAME" ]; then
  95. account_name="$ACCOUNT_NAME"
  96. else
  97. account_name=$(get_default_account)
  98. if [ -z "$account_name" ]; then
  99. print_error "未指定账号,请通过参数或环境变量指定"
  100. echo "用法: $0 <账号名>"
  101. exit 1
  102. fi
  103. print_info "使用默认账号: $account_name"
  104. fi
  105. process_account "$account_name"
  106. }
  107. main "$@"