123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- #!/bin/bash
- # 多线程调度器启动脚本
- # 使用方法:
- # ./start_scheduler.sh start # 启动调度器
- # ./start_scheduler.sh stop # 停止调度器
- # ./start_scheduler.sh status # 查看状态
- # ./start_scheduler.sh restart # 重启调度器
- # ./start_scheduler.sh monitor # 监控模式(自动重启)
- SCRIPT_NAME="multi_thread_scheduler.py"
- PID_FILE="scheduler.pid"
- LOG_DIR="logs"
- MONITOR_PID_FILE="monitor.pid"
- MAX_MEMORY_MB=2048 # 最大内存使用量(MB)
- RESTART_COUNT_FILE="restart_count.txt"
- MAX_RESTARTS=10 # 最大重启次数
- # 检查Python环境
- if ! command -v python3 &> /dev/null; then
- echo "错误: 未找到Python3,请先安装Python3"
- exit 1
- fi
- # 检查依赖文件
- if [ ! -f "indentify.py" ]; then
- echo "错误: 未找到indentify.py文件"
- exit 1
- fi
- if [ ! -f "multi_thread_scheduler.py" ]; then
- echo "错误: 未找到multi_thread_scheduler.py文件"
- exit 1
- fi
- # 创建logs目录
- mkdir -p logs
- # 获取进程ID
- get_pid() {
- if [ -f "$PID_FILE" ]; then
- cat "$PID_FILE"
- else
- echo ""
- fi
- }
- # 获取监控进程ID
- get_monitor_pid() {
- if [ -f "$MONITOR_PID_FILE" ]; then
- cat "$MONITOR_PID_FILE"
- else
- echo ""
- fi
- }
- # 检查进程是否运行
- is_running() {
- local pid=$(get_pid)
- if [ -n "$pid" ]; then
- if ps -p "$pid" > /dev/null 2>&1; then
- return 0
- else
- # 进程不存在,删除PID文件
- rm -f "$PID_FILE"
- return 1
- fi
- fi
- return 1
- }
- # 检查内存使用情况
- check_memory() {
- local pid=$(get_pid)
- if [ -n "$pid" ] && ps -p "$pid" > /dev/null 2>&1; then
- local memory_kb=$(ps -p "$pid" -o rss= 2>/dev/null | tr -d ' ')
- if [ -n "$memory_kb" ]; then
- local memory_mb=$((memory_kb / 1024))
- echo "$memory_mb"
- else
- echo "0"
- fi
- else
- echo "0"
- fi
- }
- # 记录重启次数
- record_restart() {
- local count=0
- if [ -f "$RESTART_COUNT_FILE" ]; then
- count=$(cat "$RESTART_COUNT_FILE")
- fi
- count=$((count + 1))
- echo "$count" > "$RESTART_COUNT_FILE"
- echo "$count"
- }
- # 重置重启计数
- reset_restart_count() {
- echo "0" > "$RESTART_COUNT_FILE"
- }
- # 启动调度器
- start_scheduler() {
- if is_running; then
- echo "调度器已经在运行中 (PID: $(get_pid))"
- return 1
- fi
-
- echo "正在启动多线程调度器..."
- echo "日志文件将保存在 $LOG_DIR/ 目录中"
- echo "最大内存使用量: ${MAX_MEMORY_MB}MB"
-
- # 设置环境变量,增加Python内存管理稳定性
- export PYTHONMALLOC=malloc
- export PYTHONDEVMODE=1
- export PYTHONUNBUFFERED=1
-
- # 后台运行Python脚本
- nohup python3 -u "$SCRIPT_NAME" > "$LOG_DIR/scheduler_stdout.log" 2>&1 &
- local pid=$!
-
- # 保存PID到文件
- echo "$pid" > "$PID_FILE"
-
- echo "调度器已启动 (PID: $pid)"
- echo "使用以下命令查看状态:"
- echo " ./start_scheduler.sh status"
- echo " tail -f $LOG_DIR/scheduler_*.log"
- echo ""
- echo "使用以下命令停止:"
- echo " ./start_scheduler.sh stop"
-
- # 等待几秒检查进程是否正常启动
- sleep 3
- if ! ps -p "$pid" > /dev/null 2>&1; then
- echo "警告: 进程启动后立即退出,请检查日志文件"
- rm -f "$PID_FILE"
- return 1
- fi
- }
- # 停止调度器
- stop_scheduler() {
- local pid=$(get_pid)
- if [ -z "$pid" ]; then
- echo "调度器未运行"
- return 1
- fi
-
- echo "正在停止调度器 (PID: $pid)..."
-
- # 发送SIGTERM信号
- kill -TERM "$pid" 2>/dev/null
-
- # 等待进程结束
- local count=0
- while [ $count -lt 10 ] && ps -p "$pid" > /dev/null 2>&1; do
- sleep 1
- count=$((count + 1))
- echo "等待进程结束... ($count/10)"
- done
-
- # 如果进程仍在运行,强制杀死
- if ps -p "$pid" > /dev/null 2>&1; then
- echo "强制停止进程..."
- kill -KILL "$pid" 2>/dev/null
- fi
-
- # 删除PID文件
- rm -f "$PID_FILE"
- echo "调度器已停止"
- }
- # 停止监控进程
- stop_monitor() {
- local monitor_pid=$(get_monitor_pid)
- if [ -n "$monitor_pid" ]; then
- echo "正在停止监控进程 (PID: $monitor_pid)..."
- kill -TERM "$monitor_pid" 2>/dev/null
-
- local count=0
- while [ $count -lt 5 ] && ps -p "$monitor_pid" > /dev/null 2>&1; do
- sleep 1
- count=$((count + 1))
- done
-
- if ps -p "$monitor_pid" > /dev/null 2>&1; then
- kill -KILL "$monitor_pid" 2>/dev/null
- fi
-
- rm -f "$MONITOR_PID_FILE"
- echo "监控进程已停止"
- fi
- }
- # 查看状态
- show_status() {
- if is_running; then
- local pid=$(get_pid)
- local memory_mb=$(check_memory)
- echo "调度器正在运行 (PID: $pid)"
- echo "内存使用: ${memory_mb}MB / ${MAX_MEMORY_MB}MB"
- echo "进程信息:"
- ps -p "$pid" -o pid,ppid,cmd,etime
- echo ""
-
- # 检查重启次数
- if [ -f "$RESTART_COUNT_FILE" ]; then
- local restart_count=$(cat "$RESTART_COUNT_FILE")
- echo "重启次数: $restart_count / $MAX_RESTARTS"
- fi
-
- echo ""
- echo "最近的日志:"
- if [ -f "$LOG_DIR/scheduler_$(date +%Y%m%d).log" ]; then
- tail -5 "$LOG_DIR/scheduler_$(date +%Y%m%d).log"
- else
- echo "未找到今日日志文件"
- fi
-
- # 检查监控进程状态
- if [ -f "$MONITOR_PID_FILE" ]; then
- local monitor_pid=$(get_monitor_pid)
- if ps -p "$monitor_pid" > /dev/null 2>&1; then
- echo ""
- echo "监控进程正在运行 (PID: $monitor_pid)"
- fi
- fi
- else
- echo "调度器未运行"
- fi
- }
- # 重启调度器
- restart_scheduler() {
- echo "重启调度器..."
- stop_scheduler
- sleep 2
- start_scheduler
- }
- # 监控模式
- start_monitor() {
- if [ -f "$MONITOR_PID_FILE" ] && ps -p "$(get_monitor_pid)" > /dev/null 2>&1; then
- echo "监控进程已经在运行中"
- return 1
- fi
-
- echo "启动监控模式..."
- echo "监控进程将自动重启崩溃的调度器"
-
- # 重置重启计数
- reset_restart_count
-
- # 启动监控进程
- (
- while true; do
- if ! is_running; then
- local restart_count=$(record_restart)
- echo "$(date): 调度器已停止,尝试重启 (第${restart_count}次)"
-
- if [ "$restart_count" -le "$MAX_RESTARTS" ]; then
- start_scheduler
- if is_running; then
- echo "$(date): 调度器重启成功"
- sleep 30 # 等待30秒再检查
- else
- echo "$(date): 调度器重启失败"
- sleep 60 # 等待1分钟再尝试
- fi
- else
- echo "$(date): 达到最大重启次数 ($MAX_RESTARTS),停止监控"
- break
- fi
- else
- # 检查内存使用情况
- local memory_mb=$(check_memory)
- if [ "$memory_mb" -gt "$MAX_MEMORY_MB" ]; then
- echo "$(date): 内存使用过高 (${memory_mb}MB > ${MAX_MEMORY_MB}MB),重启调度器"
- stop_scheduler
- sleep 5
- start_scheduler
- fi
-
- sleep 30 # 每30秒检查一次
- fi
- done
- ) > "$LOG_DIR/monitor.log" 2>&1 &
-
- local monitor_pid=$!
- echo "$monitor_pid" > "$MONITOR_PID_FILE"
- echo "监控进程已启动 (PID: $monitor_pid)"
- echo "监控日志: tail -f $LOG_DIR/monitor.log"
- }
- # 停止监控
- stop_monitor_mode() {
- stop_monitor
- echo "监控模式已停止"
- }
- # 主逻辑
- case "${1:-start}" in
- start)
- start_scheduler
- ;;
- stop)
- stop_scheduler
- stop_monitor
- ;;
- status)
- show_status
- ;;
- restart)
- restart_scheduler
- ;;
- monitor)
- start_monitor
- ;;
- stop-monitor)
- stop_monitor_mode
- ;;
- cache-status)
- echo "查看缓存状态..."
- python3 cache_manager.py status
- ;;
- cache-clean)
- echo "清理缓存文件..."
- python3 cache_manager.py clean
- ;;
- cache-cleanup)
- echo "清理过期缓存文件..."
- python3 cache_manager.py cleanup
- ;;
- *)
- echo "用法: $0 {start|stop|status|restart|monitor|stop-monitor}"
- echo ""
- echo "命令说明:"
- echo " start - 启动调度器"
- echo " stop - 停止调度器和监控"
- echo " status - 查看运行状态"
- echo " restart - 重启调度器"
- echo " monitor - 启动监控模式(自动重启)"
- echo " stop-monitor - 停止监控模式"
- echo " cache-status - 查看缓存状态"
- echo " cache-clean - 清理所有缓存文件"
- echo " cache-cleanup- 清理过期缓存文件"
- echo ""
- echo "监控模式特性:"
- echo " - 自动检测进程崩溃并重启"
- echo " - 内存使用监控(超过${MAX_MEMORY_MB}MB自动重启)"
- echo " - 最大重启次数限制: $MAX_RESTARTS"
- echo ""
- echo "示例:"
- echo " $0 start # 启动"
- echo " $0 monitor # 启动监控模式"
- echo " $0 status # 查看状态"
- echo " $0 cache-status # 查看缓存状态"
- echo " $0 cache-cleanup # 清理过期缓存"
- echo " $0 stop # 停止所有"
- exit 1
- ;;
- esac
|