task_server.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. import concurrent.futures
  2. import json
  3. import threading
  4. from concurrent.futures import ThreadPoolExecutor
  5. from datetime import datetime
  6. from typing import Dict
  7. from sqlalchemy import func
  8. from pqai_agent.agents.multimodal_chat_agent import MultiModalChatAgent
  9. from pqai_agent.data_models.agent_configuration import AgentConfiguration
  10. from pqai_agent.data_models.agent_test_task import AgentTestTask
  11. from pqai_agent.data_models.agent_test_task_conversations import AgentTestTaskConversations
  12. from pqai_agent.data_models.service_module import ServiceModule
  13. from pqai_agent.logging import logger
  14. from pqai_agent_server.const.status_enum import TestTaskConversationsStatus, TestTaskStatus, get_test_task_status_desc, \
  15. get_test_task_conversations_status_desc
  16. from pqai_agent_server.evaluate_agent import evaluate_agent
  17. class TaskManager:
  18. """任务管理器"""
  19. def __init__(self, session_maker, dataset_service):
  20. self.session_maker = session_maker
  21. self.dataset_service = dataset_service
  22. self.task_events = {} # 任务ID -> Event (用于取消任务)
  23. self.task_locks = {} # 任务ID -> Lock (用于任务状态同步)
  24. self.running_tasks = set()
  25. self.executor = ThreadPoolExecutor(max_workers=20, thread_name_prefix='TaskWorker')
  26. self.create_task_executor = ThreadPoolExecutor(max_workers=10, thread_name_prefix='CreateTaskWorker')
  27. self.task_futures = {} # 任务ID -> Future
  28. def get_test_task_list(self, page_num: int, page_size: int) -> Dict:
  29. with self.session_maker() as session:
  30. # 计算偏移量
  31. offset = (page_num - 1) * page_size
  32. # 查询分页数据
  33. result = (session.query(AgentTestTask, AgentConfiguration, ServiceModule)
  34. .outerjoin(AgentConfiguration, AgentTestTask.agent_id == AgentConfiguration.id)
  35. .outerjoin(ServiceModule, AgentTestTask.module_id == ServiceModule.id)
  36. .limit(page_size).offset(offset).all())
  37. # 查询总记录数
  38. total = session.query(func.count(AgentTestTask.id)).scalar()
  39. total_page = total // page_size + 1 if total % page_size > 0 else total // page_size
  40. total_page = 1 if total_page <= 0 else total_page
  41. response_data = [
  42. {
  43. "id": agent_test_task.id,
  44. "agentId": agent_configuration.id,
  45. "agentName": agent_configuration.display_name,
  46. "moduleName": service_module.display_name,
  47. "createUser": agent_test_task.create_user,
  48. "updateUser": agent_test_task.update_user,
  49. "status": agent_test_task.status,
  50. "statusName": get_test_task_status_desc(agent_test_task.status),
  51. "createTime": agent_test_task.create_time.strftime("%Y-%m-%d %H:%M:%S"),
  52. "updateTime": agent_test_task.update_time.strftime("%Y-%m-%d %H:%M:%S")
  53. }
  54. for agent_test_task, agent_configuration, service_module in result
  55. ]
  56. return {
  57. "currentPage": page_num,
  58. "pageSize": page_size,
  59. "totalSize": total_page,
  60. "total": total,
  61. "list": response_data,
  62. }
  63. def get_test_task_conversations(self, task_id: int, page_num: int, page_size: int) -> Dict:
  64. with self.session_maker() as session:
  65. # 计算偏移量
  66. offset = (page_num - 1) * page_size
  67. # 查询分页数据
  68. result = (session.query(AgentTestTaskConversations, AgentConfiguration)
  69. .outerjoin(AgentConfiguration, AgentTestTaskConversations.agent_id == AgentConfiguration.id)
  70. .filter(AgentTestTaskConversations.task_id == task_id)
  71. .limit(page_size).offset(offset).all())
  72. # 查询总记录数
  73. total = session.query(func.count(AgentTestTaskConversations.id)).filter(
  74. AgentTestTaskConversations.task_id == task_id).scalar()
  75. total_page = total // page_size + 1 if total % page_size > 0 else total // page_size
  76. total_page = 1 if total_page <= 0 else total_page
  77. response_data = [
  78. {
  79. "id": agent_test_task_conversation.id,
  80. "datasetId": agent_test_task_conversation.dataset_id,
  81. "conversationId": agent_test_task_conversation.conversation_id,
  82. "input": MultiModalChatAgent.compose_dialogue(json.loads(agent_test_task_conversation.input))
  83. if agent_test_task_conversation.input and agent_test_task_conversation.input.strip()
  84. else None,
  85. "output": agent_test_task_conversation.output,
  86. "score": agent_test_task_conversation.score,
  87. "statusName": get_test_task_conversations_status_desc(agent_test_task_conversation.status),
  88. "createTime": agent_test_task_conversation.create_time.strftime("%Y-%m-%d %H:%M:%S"),
  89. "updateTime": agent_test_task_conversation.update_time.strftime("%Y-%m-%d %H:%M:%S")
  90. }
  91. for agent_test_task_conversation, agent_configuration in result
  92. ]
  93. return {
  94. "currentPage": page_num,
  95. "pageSize": page_size,
  96. "totalSize": total_page,
  97. "total": total,
  98. "list": response_data,
  99. }
  100. def create_task(self, agent_id: int, module_id: int, evaluate_type: int, user: str) -> Dict:
  101. """创建新任务"""
  102. with self.session_maker() as session:
  103. agent_test_task = AgentTestTask(agent_id=agent_id, module_id=module_id, evaluate_type=evaluate_type,
  104. status=TestTaskStatus.CREATING.value, create_user=user, update_user=user)
  105. session.add(agent_test_task)
  106. session.commit() # 显式提交
  107. task_id = agent_test_task.id
  108. # 异步执行创建任务
  109. self.create_task_executor.submit(self._generate_agent_test_task_conversation_batch, task_id, agent_id,
  110. module_id)
  111. return self.get_task(task_id)
  112. def _generate_agent_test_task_conversation_batch(self, task_id: int, agent_id: int, module_id: int):
  113. """异步生成子任务"""
  114. try:
  115. # 获取数据集列表
  116. dataset_module_list = self.dataset_service.get_dataset_module_list_by_module(module_id)
  117. # 批量处理数据集 - 减少数据库交互
  118. batch_size = 100 # 每批处理100个子任务
  119. agent_test_task_conversation_batch = []
  120. for dataset_module in dataset_module_list:
  121. # 获取对话数据列表
  122. conversation_datas = self.dataset_service.get_conversation_data_list_by_dataset(
  123. dataset_module.dataset_id)
  124. for conversation_data in conversation_datas:
  125. # 创建子任务对象
  126. agent_test_task_conversation = AgentTestTaskConversations(
  127. task_id=task_id,
  128. agent_id=agent_id,
  129. dataset_id=dataset_module.dataset_id,
  130. conversation_id=conversation_data.id,
  131. status=TestTaskConversationsStatus.PENDING.value
  132. )
  133. agent_test_task_conversation_batch.append(agent_test_task_conversation)
  134. # 批量提交
  135. if len(agent_test_task_conversation_batch) >= batch_size:
  136. self.save_agent_test_task_conversation_batch(agent_test_task_conversation_batch)
  137. agent_test_task_conversation_batch = []
  138. # 提交剩余的子任务
  139. if agent_test_task_conversation_batch:
  140. self.save_agent_test_task_conversation_batch(agent_test_task_conversation_batch)
  141. # 更新主任务状态为未开始
  142. self.update_task_status(task_id, TestTaskStatus.NOT_STARTED.value)
  143. # 自动提交任务执行
  144. self._execute_task(task_id)
  145. except Exception as e:
  146. logger.error(f"生成子任务失败: {str(e)}")
  147. # 更新任务状态为失败
  148. self.update_task_status(task_id, TestTaskStatus.CREATED_FAIL.value)
  149. def save_agent_test_task_conversation_batch(self, agent_test_task_conversation_batch: list):
  150. """批量保存子任务到数据库"""
  151. try:
  152. with self.session_maker() as session:
  153. with session.begin():
  154. session.add_all(agent_test_task_conversation_batch)
  155. except Exception as e:
  156. logger.error(e)
  157. def get_agent_configuration_by_task_id(self, task_id: int):
  158. """获取指定任务ID对应的Agent配置信息"""
  159. with self.session_maker() as session:
  160. return session.query(AgentConfiguration) \
  161. .join(AgentTestTask, AgentTestTask.agent_id == AgentConfiguration.id) \
  162. .filter(AgentTestTask.id == task_id) \
  163. .one_or_none() # 返回单个对象或None(如果未找到)
  164. def get_service_module_by_task_id(self, task_id: int):
  165. """获取指定任务ID对应的Agent配置信息"""
  166. with self.session_maker() as session:
  167. return session.query(ServiceModule) \
  168. .join(AgentTestTask, AgentTestTask.module_id == ServiceModule.id) \
  169. .filter(AgentTestTask.id == task_id) \
  170. .one_or_none() # 返回单个对象或None(如果未找到)
  171. def get_task(self, task_id: int):
  172. """获取任务信息"""
  173. with self.session_maker() as session:
  174. return session.query(AgentTestTask).filter(AgentTestTask.id == task_id).one()
  175. def get_in_progress_task(self):
  176. """获取执行中任务"""
  177. with self.session_maker() as session:
  178. return session.query(AgentTestTask).filter(AgentTestTask.status.in_([
  179. TestTaskStatus.NOT_STARTED.value,
  180. TestTaskStatus.IN_PROGRESS.value
  181. ])).all()
  182. def get_creating_task(self):
  183. """获取执行中任务"""
  184. with self.session_maker() as session:
  185. return session.query(AgentTestTask).filter(AgentTestTask.status == TestTaskStatus.CREATING.value).all()
  186. def get_task_conversations(self, task_id: int):
  187. """获取任务的所有子任务"""
  188. with self.session_maker() as session:
  189. return session.query(AgentTestTaskConversations).filter(AgentTestTaskConversations.task_id == task_id).all()
  190. def del_task_conversations(self, task_id: int):
  191. with self.session_maker() as session:
  192. session.query(AgentTestTaskConversations).filter(AgentTestTaskConversations.task_id == task_id).delete()
  193. # 提交事务生效
  194. session.commit()
  195. def get_pending_task_conversations(self, task_id: int):
  196. """获取待处理的子任务"""
  197. with self.session_maker() as session:
  198. return session.query(AgentTestTaskConversations).filter(
  199. AgentTestTaskConversations.task_id == task_id).filter(
  200. AgentTestTaskConversations.status.in_([
  201. TestTaskConversationsStatus.PENDING.value,
  202. TestTaskConversationsStatus.RUNNING.value
  203. ])).all()
  204. def update_task_status(self, task_id: int, status: int):
  205. """更新任务状态"""
  206. with self.session_maker() as session:
  207. session.query(AgentTestTask).filter(AgentTestTask.id == task_id).update(
  208. {"status": status, "update_time": datetime.now()})
  209. session.commit()
  210. def update_task_conversations_status(self, task_conversations_id: int, status: int):
  211. """更新子任务状态"""
  212. with self.session_maker() as session:
  213. session.query(AgentTestTaskConversations).filter(
  214. AgentTestTaskConversations.id == task_conversations_id).update(
  215. {"status": status, "update_time": datetime.now()})
  216. session.commit()
  217. def update_task_conversations_res(self, task_conversations_id: int, status: int, input: str, output: str,
  218. score: str):
  219. """更新子任务结果"""
  220. with self.session_maker() as session:
  221. session.query(AgentTestTaskConversations).filter(
  222. AgentTestTaskConversations.id == task_conversations_id).update(
  223. {"status": status, "input": input, "output": output, "score": score, "update_time": datetime.now()})
  224. session.commit()
  225. def cancel_task(self, task_id: int, user: str):
  226. """取消任务(带事务支持)"""
  227. # 设置取消事件
  228. # 1. 设置取消事件(通知任务内部)
  229. if task_id in self.task_events:
  230. self.task_events[task_id].set()
  231. # 如果任务正在执行,尝试取消Future
  232. if task_id in self.task_futures:
  233. self.task_futures[task_id].cancel()
  234. with self.session_maker() as session:
  235. with session.begin():
  236. session.query(AgentTestTask).filter(AgentTestTask.id == task_id).update(
  237. {"status": TestTaskStatus.CANCELLED.value, "update_user": user, "update_time": datetime.now()})
  238. session.query(AgentTestTaskConversations).filter(AgentTestTaskConversations.task_id == task_id).filter(
  239. AgentTestTaskConversations.status == TestTaskConversationsStatus.PENDING.value).update(
  240. {"status": TestTaskConversationsStatus.CANCELLED.value, "update_time": datetime.now()})
  241. session.commit()
  242. self._cleanup_task_resources(task_id)
  243. def resume_task(self, task_id: int, user: str) -> bool:
  244. """恢复已取消的任务"""
  245. task = self.get_task(task_id)
  246. if not task or task.status != TestTaskStatus.CANCELLED.value:
  247. return False
  248. with self.session_maker() as session:
  249. with session.begin():
  250. session.query(AgentTestTask).filter(AgentTestTask.id == task_id).update(
  251. {"status": TestTaskStatus.NOT_STARTED.value, "update_user": user, "update_time": datetime.now()})
  252. session.query(AgentTestTaskConversations).filter(AgentTestTaskConversations.task_id == task_id).filter(
  253. AgentTestTaskConversations.status == TestTaskConversationsStatus.CANCELLED.value).update(
  254. {"status": TestTaskConversationsStatus.PENDING.value, "update_time": datetime.now()})
  255. session.commit()
  256. # 重新执行任务
  257. self._execute_task(task_id)
  258. logger.info(f"Resumed task {task_id}")
  259. return True
  260. def recover_tasks(self):
  261. """服务启动时恢复未完成的任务"""
  262. creating = self.get_creating_task()
  263. for task in creating:
  264. task_id = task.id
  265. agent_id = task.agent_id
  266. module_id = task.module_id
  267. self.del_task_conversations(task_id)
  268. # 重新提交任务
  269. # 异步执行创建任务
  270. self.create_task_executor.submit(self._generate_agent_test_task_conversation_batch, task_id, agent_id,
  271. module_id)
  272. # 获取所有进行中的任务ID(根据实际状态定义查询)
  273. in_progress_tasks = self.get_in_progress_task()
  274. for task in in_progress_tasks:
  275. task_id = task.id
  276. # 重新提交任务
  277. self._execute_task(task_id)
  278. def _execute_task(self, task_id: int):
  279. """提交任务到线程池执行"""
  280. # 确保任务状态一致性
  281. if task_id in self.running_tasks:
  282. return
  283. # 创建任务事件和锁
  284. if task_id not in self.task_events:
  285. self.task_events[task_id] = threading.Event()
  286. if task_id not in self.task_locks:
  287. self.task_locks[task_id] = threading.Lock()
  288. # 提交到线程池
  289. future = self.executor.submit(self._process_task, task_id)
  290. self.task_futures[task_id] = future
  291. # 标记任务为运行中
  292. with self.task_locks[task_id]:
  293. self.running_tasks.add(task_id)
  294. def _process_task(self, task_id: int):
  295. """处理任务的所有子任务(并发执行)"""
  296. try:
  297. self.update_task_status(task_id, TestTaskStatus.IN_PROGRESS.value)
  298. task_conversations = self.get_pending_task_conversations(task_id)
  299. if not task_conversations:
  300. self.update_task_status(task_id, TestTaskStatus.COMPLETED.value)
  301. return
  302. agent_configuration = self.get_agent_configuration_by_task_id(task_id)
  303. query_prompt_template = agent_configuration.task_prompt
  304. task = self.get_task(task_id)
  305. # 使用线程池执行子任务
  306. with ThreadPoolExecutor(max_workers=8) as executor: # 可根据需要调整并发数
  307. futures = {}
  308. for task_conversation in task_conversations:
  309. # 提交子任务到线程池
  310. future = executor.submit(
  311. self._process_single_conversation,
  312. task_id,
  313. task,
  314. task_conversation,
  315. query_prompt_template,
  316. agent_configuration
  317. )
  318. futures[future] = task_conversation.id
  319. # 等待所有子任务完成或取消
  320. for future in concurrent.futures.as_completed(futures):
  321. conv_id = futures[future]
  322. try:
  323. future.result() # 获取结果(如有异常会在此抛出)
  324. except Exception as e:
  325. logger.error(f"Subtask {conv_id} failed: {str(e)}")
  326. self.update_task_conversations_status(
  327. conv_id,
  328. TestTaskConversationsStatus.FAILED.value
  329. )
  330. # 检查最终任务状态
  331. self._update_final_task_status(task_id)
  332. except Exception as e:
  333. logger.error(f"Error processing task {task_id}: {str(e)}")
  334. self.update_task_status(task_id, TestTaskStatus.FAILED.value)
  335. finally:
  336. self._cleanup_task_resources(task_id)
  337. def _process_single_conversation(self, task_id, task, task_conversation, query_prompt_template,
  338. agent_configuration):
  339. """处理单个对话子任务(线程安全)"""
  340. # 获取锁(避免竞态条件)
  341. task_lock = self.task_locks.get(task_id, threading.Lock())
  342. with task_lock:
  343. # 检查任务是否被取消或不存在
  344. if task_id not in self.task_events:
  345. logger.warning(f"Task {task_id} not found in task_events")
  346. return
  347. if self.task_events[task_id].is_set():
  348. logger.info(f"Task {task_id} already cancelled")
  349. return
  350. # 更新子任务状态
  351. if task_conversation.status == TestTaskConversationsStatus.PENDING.value:
  352. self.update_task_conversations_status(
  353. task_conversation.id,
  354. TestTaskConversationsStatus.RUNNING.value
  355. )
  356. else:
  357. return
  358. try:
  359. # 获取对话数据
  360. conversation_data = self.dataset_service.get_conversation_data_by_id(
  361. task_conversation.conversation_id)
  362. user_profile_data = self.dataset_service.get_user_profile_data(
  363. conversation_data.user_id,
  364. conversation_data.version_date.replace("-", ""))
  365. user_profile = json.loads(user_profile_data['profile_data_v1'])
  366. avatar = user_profile_data['iconurl']
  367. staff_profile = self.dataset_service.get_staff_profile_data(
  368. conversation_data.staff_id).agent_profile
  369. conversations = self.dataset_service.get_chat_conversation_list_by_ids(
  370. json.loads(conversation_data.conversation),
  371. conversation_data.staff_id
  372. )
  373. conversations = sorted(conversations, key=lambda i: i['timestamp'], reverse=False)
  374. # 生成推送消息
  375. last_timestamp = int(conversations[-1]["timestamp"])
  376. match task.evaluate_type:
  377. case 0:
  378. send_timestamp = int(last_timestamp / 1000) + 10
  379. case 1:
  380. send_timestamp = int(last_timestamp / 1000) + 24 * 3600
  381. case _:
  382. raise ValueError("evaluate_type must be 0 or 1")
  383. send_time = datetime.fromtimestamp(send_timestamp).strftime('%Y-%m-%d %H:%M:%S')
  384. except Exception as e:
  385. logger.error(f"Subtask {task_conversation.id} failed: {str(e)}")
  386. self.update_task_conversations_status(
  387. task_conversation.id,
  388. TestTaskConversationsStatus.FAILED.value
  389. )
  390. return
  391. try:
  392. # 创建独立的agent实例(确保线程安全)
  393. agent = MultiModalChatAgent(
  394. model=agent_configuration.execution_model,
  395. system_prompt=agent_configuration.system_prompt,
  396. tools=json.loads(agent_configuration.tools)
  397. )
  398. message = agent._generate_message(
  399. context={
  400. "formatted_staff_profile": staff_profile,
  401. "nickname": user_profile['nickname'],
  402. "name": user_profile['name'],
  403. "avatar": avatar,
  404. "preferred_nickname": user_profile['preferred_nickname'],
  405. "gender": user_profile['gender'],
  406. "age": user_profile['age'],
  407. "region": user_profile['region'],
  408. "health_conditions": user_profile['health_conditions'],
  409. "medications": user_profile['medications'],
  410. "interests": user_profile['interests'],
  411. "current_datetime": send_time
  412. },
  413. dialogue_history=conversations,
  414. query_prompt_template=query_prompt_template
  415. )
  416. if not message:
  417. self.update_task_conversations_status(
  418. task_conversation.id,
  419. TestTaskConversationsStatus.MESSAGE_FAILED.value
  420. )
  421. return
  422. except Exception as e:
  423. logger.error(f"Subtask {task_conversation.id} failed: {str(e)}")
  424. self.update_task_conversations_status(
  425. task_conversation.id,
  426. TestTaskConversationsStatus.MESSAGE_FAILED.value
  427. )
  428. return
  429. try:
  430. param = {}
  431. param["dialogue_history"] = conversations
  432. param["message"] = message
  433. param["send_time"] = send_time
  434. param["agent_profile"] = json.loads(staff_profile)
  435. param["user_profile"] = user_profile
  436. score = evaluate_agent(param, task.evaluate_type)
  437. if not score:
  438. self.update_task_conversations_status(
  439. task_conversation.id,
  440. TestTaskConversationsStatus.SCORE_FAILED.value
  441. )
  442. return
  443. # 更新子任务结果
  444. self.update_task_conversations_res(
  445. task_conversation.id,
  446. TestTaskConversationsStatus.SUCCESS.value,
  447. json.dumps(conversations, ensure_ascii=False),
  448. json.dumps(message, ensure_ascii=False),
  449. json.dumps(score, ensure_ascii=False)
  450. )
  451. except Exception as e:
  452. logger.error(f"Subtask {task_conversation.id} failed: {str(e)}")
  453. self.update_task_conversations_status(
  454. task_conversation.id,
  455. TestTaskConversationsStatus.SCORE_FAILED.value
  456. )
  457. def _update_final_task_status(self, task_id):
  458. """更新任务的最终状态"""
  459. task_conversations = self.get_task_conversations(task_id)
  460. all_completed = all(
  461. conv.status in (TestTaskConversationsStatus.SUCCESS.value,
  462. TestTaskConversationsStatus.FAILED.value,
  463. TestTaskConversationsStatus.MESSAGE_FAILED.value,
  464. TestTaskConversationsStatus.SCORE_FAILED.value)
  465. for conv in task_conversations
  466. )
  467. if all_completed:
  468. self.update_task_status(task_id, TestTaskStatus.COMPLETED.value)
  469. logger.info(f"Task {task_id} completed")
  470. elif not any(
  471. conv.status in (TestTaskConversationsStatus.PENDING.value,
  472. TestTaskConversationsStatus.RUNNING.value)
  473. for conv in task_conversations
  474. ):
  475. current_status = self.get_task(task_id).status
  476. if current_status != TestTaskStatus.CANCELLED.value:
  477. new_status = TestTaskStatus.COMPLETED.value if all_completed else TestTaskStatus.CANCELLED.value
  478. self.update_task_status(task_id, new_status)
  479. def _cleanup_task_resources(self, task_id):
  480. """清理任务资源(线程安全)"""
  481. with self.task_locks[task_id]:
  482. if task_id in self.running_tasks:
  483. self.running_tasks.remove(task_id)
  484. if task_id in self.task_events:
  485. del self.task_events[task_id]
  486. if task_id in self.task_futures:
  487. del self.task_futures[task_id]
  488. def shutdown(self):
  489. """关闭执行器"""
  490. self.executor.shutdown(wait=False)
  491. logger.info("Task executor shutdown")