| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 |
- #!/bin/bash
- # 数据处理脚本执行脚本(支持多账号配置)
- # 使用方式:
- # 1. 处理单个账号: ./run_all_with_config.sh 阿里多多酱
- # 2. 处理所有启用的账号: ./run_all_with_config.sh --all
- # 3. 使用环境变量: ACCOUNT_NAME=阿里多多酱 ./run_all_with_config.sh
- set -e # 遇到错误立即退出
- # 获取脚本所在目录的绝对路径
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- # 项目根目录
- PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
- # 切换到项目根目录
- cd "$PROJECT_ROOT"
- # 颜色定义
- GREEN='\033[0;32m'
- RED='\033[0;31m'
- YELLOW='\033[1;33m'
- BLUE='\033[0;34m'
- NC='\033[0m' # No Color
- # 打印带颜色的消息
- print_info() {
- echo -e "${BLUE}[INFO]${NC} $1"
- }
- print_success() {
- echo -e "${GREEN}[SUCCESS]${NC} $1"
- }
- print_error() {
- echo -e "${RED}[ERROR]${NC} $1"
- }
- print_warning() {
- echo -e "${YELLOW}[WARNING]${NC} $1"
- }
- # 处理单个账号的函数
- process_account() {
- local account_name=$1
- echo ""
- echo "=========================================="
- echo "处理账号: $account_name"
- echo "=========================================="
- # 设置环境变量
- export ACCOUNT_NAME="$account_name"
- # 检查路径配置
- print_info "检查路径配置..."
- python script/data_processing/path_config.py "$account_name"
- if [ $? -ne 0 ]; then
- print_error "路径配置检查失败,跳过账号: $account_name"
- return 1
- fi
- echo ""
- print_info "开始处理数据..."
- echo ""
- # 步骤1: 提取特征分类映射
- print_info "[1/5] 正在提取特征分类映射..."
- python script/data_processing/extract_feature_categories.py
- if [ $? -eq 0 ]; then
- print_success "特征分类映射提取完成"
- else
- print_error "特征分类映射提取失败"
- return 1
- fi
- echo ""
- # 步骤2: 提取特征帖子来源
- print_info "[2/5] 正在提取特征帖子来源..."
- python script/data_processing/extract_features_from_posts.py
- if [ $? -eq 0 ]; then
- print_success "特征帖子来源提取完成"
- else
- print_error "特征帖子来源提取失败"
- return 1
- fi
- echo ""
- # 步骤3: 提取当前帖子解构任务列表
- print_info "[3/5] 正在提取当前帖子解构任务列表..."
- python script/data_processing/extract_current_posts.py
- if [ $? -eq 0 ]; then
- print_success "当前帖子解构任务列表提取完成"
- else
- print_error "当前帖子解构任务列表提取失败"
- return 1
- fi
- echo ""
- # 步骤4: 匹配灵感特征
- print_info "[4/5] 正在匹配灵感特征..."
- python script/data_processing/match_inspiration_features.py
- if [ $? -eq 0 ]; then
- print_success "灵感特征匹配完成"
- else
- print_error "灵感特征匹配失败"
- return 1
- fi
- echo ""
- # 步骤5: 可视化结果
- print_info "[5/5] 正在生成可视化结果..."
- python script/data_processing/visualize_how_results.py
- if [ $? -eq 0 ]; then
- print_success "可视化结果生成完成"
- else
- print_error "可视化结果生成失败"
- return 1
- fi
- echo ""
- print_success "账号 $account_name 处理完成!"
- return 0
- }
- # 获取所有启用的账号
- get_enabled_accounts() {
- python -c "
- import json
- from pathlib import Path
- config_file = Path('config/accounts.json')
- with open(config_file) as f:
- config = json.load(f)
- accounts = [acc['name'] for acc in config.get('accounts', []) if acc.get('enabled', True)]
- print(' '.join(accounts))
- "
- }
- # 获取默认账号
- get_default_account() {
- python -c "
- import json
- from pathlib import Path
- config_file = Path('config/accounts.json')
- with open(config_file) as f:
- config = json.load(f)
- default = config.get('default_account', '')
- print(default)
- "
- }
- # 主逻辑
- main() {
- echo "=========================================="
- echo "数据处理流程"
- echo "项目根目录: $PROJECT_ROOT"
- echo "=========================================="
- # 解析参数
- if [ "$1" == "--all" ]; then
- # 处理所有启用的账号
- print_info "处理所有启用的账号..."
- accounts=$(get_enabled_accounts)
- if [ -z "$accounts" ]; then
- print_error "没有找到启用的账号"
- exit 1
- fi
- print_info "找到启用的账号: $accounts"
- success_count=0
- fail_count=0
- for account in $accounts; do
- if process_account "$account"; then
- ((success_count++))
- else
- ((fail_count++))
- fi
- done
- echo ""
- echo "=========================================="
- print_info "批量处理完成"
- print_success "成功: $success_count 个账号"
- if [ $fail_count -gt 0 ]; then
- print_error "失败: $fail_count 个账号"
- fi
- echo "=========================================="
- if [ $fail_count -gt 0 ]; then
- exit 1
- fi
- elif [ -n "$1" ]; then
- # 处理指定的账号
- process_account "$1"
- elif [ -n "$ACCOUNT_NAME" ]; then
- # 使用环境变量指定的账号
- print_info "使用环境变量 ACCOUNT_NAME=$ACCOUNT_NAME"
- process_account "$ACCOUNT_NAME"
- else
- # 尝试使用默认账号
- default_account=$(get_default_account)
- if [ -n "$default_account" ]; then
- print_info "使用默认账号: $default_account"
- process_account "$default_account"
- exit $?
- fi
- # 显示用法
- echo "用法:"
- echo " 1. 处理单个账号:"
- echo " $0 <账号名>"
- echo " 例如: $0 阿里多多酱"
- echo ""
- echo " 2. 处理所有启用的账号:"
- echo " $0 --all"
- echo ""
- echo " 3. 使用环境变量:"
- echo " ACCOUNT_NAME=阿里多多酱 $0"
- echo ""
- # 显示可用账号
- accounts=$(get_enabled_accounts)
- if [ -n "$accounts" ]; then
- echo "当前启用的账号:"
- for account in $accounts; do
- echo " - $account"
- done
- fi
- exit 1
- fi
- }
- # 运行主函数
- main "$@"
|