#!/bin/bash # 多线程调度器启动脚本 # 使用方法: # ./start_scheduler.sh start # 启动调度器 # ./start_scheduler.sh stop # 停止调度器 # ./start_scheduler.sh status # 查看状态 # ./start_scheduler.sh restart # 重启调度器 # ./start_scheduler.sh monitor # 监控模式(自动重启) SCRIPT_NAME="multi_thread_scheduler.py" PID_FILE="scheduler.pid" LOG_DIR="logs" MONITOR_PID_FILE="monitor.pid" MAX_MEMORY_MB=2048 # 最大内存使用量(MB) RESTART_COUNT_FILE="restart_count.txt" MAX_RESTARTS=10 # 最大重启次数 # 检查Python环境 if ! command -v python3 &> /dev/null; then echo "错误: 未找到Python3,请先安装Python3" exit 1 fi # 检查依赖文件 if [ ! -f "indentify.py" ]; then echo "错误: 未找到indentify.py文件" exit 1 fi if [ ! -f "multi_thread_scheduler.py" ]; then echo "错误: 未找到multi_thread_scheduler.py文件" exit 1 fi # 创建logs目录 mkdir -p logs # 获取进程ID get_pid() { if [ -f "$PID_FILE" ]; then cat "$PID_FILE" else echo "" fi } # 获取监控进程ID get_monitor_pid() { if [ -f "$MONITOR_PID_FILE" ]; then cat "$MONITOR_PID_FILE" else echo "" fi } # 检查进程是否运行 is_running() { local pid=$(get_pid) if [ -n "$pid" ]; then if ps -p "$pid" > /dev/null 2>&1; then return 0 else # 进程不存在,删除PID文件 rm -f "$PID_FILE" return 1 fi fi return 1 } # 检查内存使用情况 check_memory() { local pid=$(get_pid) if [ -n "$pid" ] && ps -p "$pid" > /dev/null 2>&1; then local memory_kb=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ') if [ -n "$memory_kb" ]; then local memory_mb=$((memory_kb / 1024)) echo "$memory_mb" else echo "0" fi else echo "0" fi } # 记录重启次数 record_restart() { local count=0 if [ -f "$RESTART_COUNT_FILE" ]; then count=$(cat "$RESTART_COUNT_FILE") fi count=$((count + 1)) echo "$count" > "$RESTART_COUNT_FILE" echo "$count" } # 重置重启计数 reset_restart_count() { echo "0" > "$RESTART_COUNT_FILE" } # 启动调度器 start_scheduler() { if is_running; then echo "调度器已经在运行中 (PID: $(get_pid))" return 1 fi echo "正在启动多线程调度器..." echo "日志文件将保存在 $LOG_DIR/ 目录中" echo "最大内存使用量: ${MAX_MEMORY_MB}MB" # 设置环境变量,增加Python内存管理稳定性 export PYTHONMALLOC=malloc export PYTHONDEVMODE=1 export PYTHONUNBUFFERED=1 # 后台运行Python脚本 nohup python3 -u "$SCRIPT_NAME" > "$LOG_DIR/scheduler_stdout.log" 2>&1 & local pid=$! # 保存PID到文件 echo "$pid" > "$PID_FILE" echo "调度器已启动 (PID: $pid)" echo "使用以下命令查看状态:" echo " ./start_scheduler.sh status" echo " tail -f $LOG_DIR/scheduler_*.log" echo "" echo "使用以下命令停止:" echo " ./start_scheduler.sh stop" # 等待几秒检查进程是否正常启动 sleep 3 if ! ps -p "$pid" > /dev/null 2>&1; then echo "警告: 进程启动后立即退出,请检查日志文件" rm -f "$PID_FILE" return 1 fi } # 停止调度器 stop_scheduler() { local pid=$(get_pid) if [ -z "$pid" ]; then echo "调度器未运行" return 1 fi echo "正在停止调度器 (PID: $pid)..." # 发送SIGTERM信号 kill -TERM "$pid" 2>/dev/null # 等待进程结束 local count=0 while [ $count -lt 10 ] && ps -p "$pid" > /dev/null 2>&1; do sleep 1 count=$((count + 1)) echo "等待进程结束... ($count/10)" done # 如果进程仍在运行,强制杀死 if ps -p "$pid" > /dev/null 2>&1; then echo "强制停止进程..." kill -KILL "$pid" 2>/dev/null fi # 删除PID文件 rm -f "$PID_FILE" echo "调度器已停止" } # 停止监控进程 stop_monitor() { local monitor_pid=$(get_monitor_pid) if [ -n "$monitor_pid" ]; then echo "正在停止监控进程 (PID: $monitor_pid)..." kill -TERM "$monitor_pid" 2>/dev/null local count=0 while [ $count -lt 5 ] && ps -p "$monitor_pid" > /dev/null 2>&1; do sleep 1 count=$((count + 1)) done if ps -p "$monitor_pid" > /dev/null 2>&1; then kill -KILL "$monitor_pid" 2>/dev/null fi rm -f "$MONITOR_PID_FILE" echo "监控进程已停止" fi } # 查看状态 show_status() { if is_running; then local pid=$(get_pid) local memory_mb=$(check_memory) echo "调度器正在运行 (PID: $pid)" echo "内存使用: ${memory_mb}MB / ${MAX_MEMORY_MB}MB" echo "进程信息:" ps -p "$pid" -o pid,ppid,cmd,etime echo "" # 检查重启次数 if [ -f "$RESTART_COUNT_FILE" ]; then local restart_count=$(cat "$RESTART_COUNT_FILE") echo "重启次数: $restart_count / $MAX_RESTARTS" fi echo "" echo "最近的日志:" if [ -f "$LOG_DIR/scheduler_$(date +%Y%m%d).log" ]; then tail -5 "$LOG_DIR/scheduler_$(date +%Y%m%d).log" else echo "未找到今日日志文件" fi # 检查监控进程状态 if [ -f "$MONITOR_PID_FILE" ]; then local monitor_pid=$(get_monitor_pid) if ps -p "$monitor_pid" > /dev/null 2>&1; then echo "" echo "监控进程正在运行 (PID: $monitor_pid)" fi fi else echo "调度器未运行" fi } # 重启调度器 restart_scheduler() { echo "重启调度器..." stop_scheduler sleep 2 start_scheduler } # 监控模式 start_monitor() { if [ -f "$MONITOR_PID_FILE" ] && ps -p "$(get_monitor_pid)" > /dev/null 2>&1; then echo "监控进程已经在运行中" return 1 fi echo "启动监控模式..." echo "监控进程将自动重启崩溃的调度器" # 重置重启计数 reset_restart_count # 启动监控进程 ( while true; do if ! is_running; then local restart_count=$(record_restart) echo "$(date): 调度器已停止,尝试重启 (第${restart_count}次)" if [ "$restart_count" -le "$MAX_RESTARTS" ]; then start_scheduler if is_running; then echo "$(date): 调度器重启成功" sleep 30 # 等待30秒再检查 else echo "$(date): 调度器重启失败" sleep 60 # 等待1分钟再尝试 fi else echo "$(date): 达到最大重启次数 ($MAX_RESTARTS),停止监控" break fi else # 检查内存使用情况 local memory_mb=$(check_memory) if [ "$memory_mb" -gt "$MAX_MEMORY_MB" ]; then echo "$(date): 内存使用过高 (${memory_mb}MB > ${MAX_MEMORY_MB}MB),重启调度器" stop_scheduler sleep 5 start_scheduler fi sleep 30 # 每30秒检查一次 fi done ) > "$LOG_DIR/monitor.log" 2>&1 & local monitor_pid=$! echo "$monitor_pid" > "$MONITOR_PID_FILE" echo "监控进程已启动 (PID: $monitor_pid)" echo "监控日志: tail -f $LOG_DIR/monitor.log" } # 停止监控 stop_monitor_mode() { stop_monitor echo "监控模式已停止" } # 主逻辑 case "${1:-start}" in start) start_scheduler ;; stop) stop_scheduler stop_monitor ;; status) show_status ;; restart) restart_scheduler ;; monitor) start_monitor ;; stop-monitor) stop_monitor_mode ;; cache-status) echo "查看缓存状态..." python3 cache_manager.py status ;; cache-clean) echo "清理缓存文件..." python3 cache_manager.py clean ;; cache-cleanup) echo "清理过期缓存文件..." python3 cache_manager.py cleanup ;; *) echo "用法: $0 {start|stop|status|restart|monitor|stop-monitor}" echo "" echo "命令说明:" echo " start - 启动调度器" echo " stop - 停止调度器和监控" echo " status - 查看运行状态" echo " restart - 重启调度器" echo " monitor - 启动监控模式(自动重启)" echo " stop-monitor - 停止监控模式" echo " cache-status - 查看缓存状态" echo " cache-clean - 清理所有缓存文件" echo " cache-cleanup- 清理过期缓存文件" echo "" echo "监控模式特性:" echo " - 自动检测进程崩溃并重启" echo " - 内存使用监控(超过${MAX_MEMORY_MB}MB自动重启)" echo " - 最大重启次数限制: $MAX_RESTARTS" echo "" echo "示例:" echo " $0 start # 启动" echo " $0 monitor # 启动监控模式" echo " $0 status # 查看状态" echo " $0 cache-status # 查看缓存状态" echo " $0 cache-cleanup # 清理过期缓存" echo " $0 stop # 停止所有" exit 1 ;; esac