start_service.sh 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. #!/bin/bash
  2. # Agent 服务管理脚本
  3. # 服务配置
  4. SERVICE_NAME="knowledge-agent"
  5. PID_FILE="/tmp/${SERVICE_NAME}.pid"
  6. LOG_FILE="logs/agent.log"
  7. PORT=8080
  8. # 创建日志目录
  9. mkdir -p logs
  10. # 显示帮助信息
  11. show_help() {
  12. echo "Usage: $0 {start|stop|restart|status|logs}"
  13. echo ""
  14. echo "Commands:"
  15. echo " start - 启动服务"
  16. echo " stop - 停止服务"
  17. echo " restart - 重启服务"
  18. echo " status - 查看服务状态"
  19. echo " logs - 查看实时日志"
  20. echo ""
  21. echo "Examples:"
  22. echo " $0 start # 启动服务"
  23. echo " $0 stop # 停止服务"
  24. echo " $0 logs # 查看实时日志"
  25. echo " tail -f logs/agent.log # 也可以直接使用 tail 命令"
  26. }
  27. # 检查服务是否运行
  28. is_running() {
  29. if [ -f "$PID_FILE" ]; then
  30. pid=$(cat "$PID_FILE")
  31. if ps -p "$pid" > /dev/null 2>&1; then
  32. return 0
  33. else
  34. rm -f "$PID_FILE"
  35. fi
  36. fi
  37. return 1
  38. }
  39. # 清理旧日志文件(保留最近20天)
  40. cleanup_old_logs() {
  41. echo "🧹 清理旧日志文件(保留最近20天)..."
  42. if [ -d "logs" ]; then
  43. # 查找并删除超过20天的日志文件
  44. OLD_LOGS=$(find logs/ -name "*.log" -type f -mtime +20 2>/dev/null)
  45. if [ ! -z "$OLD_LOGS" ]; then
  46. echo "📋 将删除以下旧日志文件:"
  47. echo "$OLD_LOGS"
  48. find logs/ -name "*.log" -type f -mtime +20 -delete 2>/dev/null
  49. echo "✅ 旧日志文件已清理"
  50. else
  51. echo "✅ 没有超过20天的日志文件"
  52. fi
  53. fi
  54. }
  55. # 启动服务
  56. start_service() {
  57. echo "🚀 启动 ${SERVICE_NAME} 服务..."
  58. # 清理旧日志
  59. cleanup_old_logs
  60. # 检查Python环境
  61. if ! command -v python3 &> /dev/null; then
  62. echo "❌ 错误: 未找到 python3 命令"
  63. exit 1
  64. fi
  65. # 检查依赖
  66. echo "📦 检查依赖..."
  67. python3 -c "import fastapi, uvicorn" 2>/dev/null
  68. if [ $? -ne 0 ]; then
  69. echo "❌ 错误: 缺少必要的依赖包"
  70. echo "请运行: pip install -r requirements.txt"
  71. exit 1
  72. fi
  73. # 检查 httpx
  74. echo "🔍 检查 httpx..."
  75. python3 -c "import httpx" 2>/dev/null
  76. if [ $? -ne 0 ]; then
  77. echo "❌ 错误: 缺少 httpx 依赖包"
  78. echo "请运行: pip install httpx"
  79. exit 1
  80. fi
  81. # 检查 LangGraph
  82. echo "🔍 检查 LangGraph..."
  83. python3 -c "import langgraph" 2>/dev/null
  84. if [ $? -ne 0 ]; then
  85. echo "⚠️ 警告: LangGraph 未安装,将使用传统模式"
  86. echo "如需启用 LangGraph,请运行: pip install langgraph"
  87. echo ""
  88. fi
  89. # 检查服务是否已经运行
  90. if is_running; then
  91. echo "⚠️ 警告: 服务已经在运行中 (PID: $(cat "$PID_FILE"))"
  92. echo "如需重启,请使用: $0 restart"
  93. exit 1
  94. fi
  95. # 启动服务
  96. echo "🌟 启动服务..."
  97. echo "📍 服务地址: http://localhost:${PORT}"
  98. echo "📚 API文档: http://localhost:${PORT}/docs"
  99. echo "🔍 健康检查: http://localhost:${PORT}/health"
  100. echo "📝 日志文件: ${LOG_FILE}"
  101. echo ""
  102. # 后台启动服务,记录PID
  103. # 生产环境禁用热重载以减少日志噪音
  104. export RELOAD_ENABLED=false
  105. export LOG_LEVEL=info
  106. export LANGCHAIN_TRACING_V2=true
  107. export LANGCHAIN_API_KEY=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
  108. export LANGCHAIN_PROJECT=knowledge-agent
  109. nohup python3 agent.py >> "$LOG_FILE" 2>&1 &
  110. echo $! > "$PID_FILE"
  111. # 等待服务启动
  112. sleep 2
  113. if is_running; then
  114. echo "✅ 服务启动成功! (PID: $(cat "$PID_FILE"))"
  115. echo "📝 查看日志: $0 logs 或 tail -f ${LOG_FILE}"
  116. echo "🛑 停止服务: $0 stop"
  117. else
  118. echo "❌ 服务启动失败,请检查日志: ${LOG_FILE}"
  119. rm -f "$PID_FILE"
  120. exit 1
  121. fi
  122. }
  123. # 停止服务
  124. stop_service() {
  125. echo "🛑 停止 ${SERVICE_NAME} 服务..."
  126. if is_running; then
  127. pid=$(cat "$PID_FILE")
  128. echo "正在停止进程 PID: $pid"
  129. # 尝试优雅停止
  130. kill "$pid" 2>/dev/null
  131. # 等待进程结束
  132. for i in {1..15}; do
  133. if ! ps -p "$pid" > /dev/null 2>&1; then
  134. break
  135. fi
  136. echo "等待进程结束... ($i/15)"
  137. sleep 1
  138. done
  139. # 如果进程仍然存在,强制杀死
  140. if ps -p "$pid" > /dev/null 2>&1; then
  141. echo "强制停止进程..."
  142. kill -9 "$pid" 2>/dev/null
  143. fi
  144. rm -f "$PID_FILE"
  145. echo "✅ 主进程已停止"
  146. else
  147. echo "ℹ️ 主进程未运行"
  148. fi
  149. # 额外清理:查找并终止所有相关进程
  150. echo "🔍 检查是否有残留进程..."
  151. # 查找所有可能的进程类型
  152. REMAINING_PROCESSES=$(ps aux | grep -E "(python.*agent\.py|uvicorn.*agent|knowledge-agent|agent\.py|multiprocessing\.spawn|multiprocessing\.resource_tracker)" | grep -v grep | awk '{print $2}')
  153. if [ ! -z "$REMAINING_PROCESSES" ]; then
  154. echo "⚠️ 发现残留进程,正在清理..."
  155. echo "📋 找到的进程:"
  156. ps aux | grep -E "(python.*agent\.py|uvicorn.*agent|knowledge-agent|agent\.py|multiprocessing\.spawn|multiprocessing\.resource_tracker)" | grep -v grep
  157. # 先尝试优雅终止
  158. for pid in $REMAINING_PROCESSES; do
  159. echo "优雅终止进程 $pid..."
  160. kill -TERM $pid 2>/dev/null
  161. done
  162. # 等待3秒
  163. sleep 3
  164. # 检查是否还有残留
  165. STILL_REMAINING=$(ps aux | grep -E "(python.*agent\.py|uvicorn.*agent|knowledge-agent|agent\.py|multiprocessing\.spawn|multiprocessing\.resource_tracker)" | grep -v grep | awk '{print $2}')
  166. if [ ! -z "$STILL_REMAINING" ]; then
  167. echo "强制终止顽固进程..."
  168. for pid in $STILL_REMAINING; do
  169. echo "强制终止进程 $pid..."
  170. kill -9 $pid 2>/dev/null
  171. done
  172. # 再次等待并检查
  173. sleep 2
  174. FINAL_CHECK=$(ps aux | grep -E "(python.*agent\.py|uvicorn.*agent|knowledge-agent|agent\.py|multiprocessing\.spawn|multiprocessing\.resource_tracker)" | grep -v grep | awk '{print $2}')
  175. if [ ! -z "$FINAL_CHECK" ]; then
  176. echo "⚠️ 仍有进程无法终止,请手动检查:"
  177. ps aux | grep -E "(python.*agent\.py|uvicorn.*agent|knowledge-agent|agent\.py|multiprocessing\.spawn|multiprocessing\.resource_tracker)" | grep -v grep
  178. fi
  179. fi
  180. echo "✅ 残留进程已清理"
  181. else
  182. echo "✅ 没有发现残留进程"
  183. fi
  184. # 检查端口占用
  185. echo "🔍 检查端口${PORT}占用情况..."
  186. PORT_PROCESS=$(lsof -ti:${PORT} 2>/dev/null)
  187. if [ ! -z "$PORT_PROCESS" ]; then
  188. echo "⚠️ 端口${PORT}仍被占用,进程ID: $PORT_PROCESS"
  189. echo "强制终止占用端口的进程..."
  190. kill -9 $PORT_PROCESS 2>/dev/null
  191. echo "✅ 端口已释放"
  192. else
  193. echo "✅ 端口${PORT}已释放"
  194. fi
  195. }
  196. # 重启服务
  197. restart_service() {
  198. echo "🔄 重启 ${SERVICE_NAME} 服务..."
  199. stop_service
  200. sleep 2
  201. start_service
  202. }
  203. # 查看服务状态
  204. show_status() {
  205. echo "🔍 ${SERVICE_NAME} 服务状态:"
  206. if is_running; then
  207. pid=$(cat "$PID_FILE")
  208. echo "✅ 状态: 运行中"
  209. echo "📊 PID: $pid"
  210. echo "📍 端口: $PORT"
  211. echo "📝 日志: $LOG_FILE"
  212. echo "🔗 地址: http://localhost:${PORT}"
  213. # 显示进程信息
  214. echo ""
  215. echo "📋 进程信息:"
  216. ps -p "$pid" -o pid,ppid,cmd,etime
  217. else
  218. echo "❌ 状态: 未运行"
  219. echo "💡 启动服务: $0 start"
  220. fi
  221. }
  222. # 查看实时日志
  223. show_logs() {
  224. echo "📝 查看实时日志 (按 Ctrl+C 退出)..."
  225. echo "日志文件: ${LOG_FILE}"
  226. echo ""
  227. if [ -f "$LOG_FILE" ]; then
  228. tail -f "$LOG_FILE"
  229. else
  230. echo "❌ 日志文件不存在: ${LOG_FILE}"
  231. echo "请先启动服务: $0 start"
  232. exit 1
  233. fi
  234. }
  235. # 主逻辑
  236. case "$1" in
  237. start)
  238. start_service
  239. ;;
  240. stop)
  241. stop_service
  242. ;;
  243. restart)
  244. restart_service
  245. ;;
  246. status)
  247. show_status
  248. ;;
  249. logs)
  250. show_logs
  251. ;;
  252. *)
  253. show_help
  254. exit 1
  255. ;;
  256. esac