push_service.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. import json
  2. import time
  3. import traceback
  4. import uuid
  5. from datetime import datetime
  6. from enum import Enum
  7. from concurrent.futures import ThreadPoolExecutor
  8. from threading import Thread
  9. from typing import Optional, Dict, List
  10. import rocketmq
  11. from rocketmq import ClientConfiguration, Credentials, SimpleConsumer, FilterExpression
  12. from pqai_agent import configs
  13. from pqai_agent.agents.message_push_agent import MessagePushAgent, DummyMessagePushAgent
  14. from pqai_agent.configs import apollo_config
  15. from pqai_agent.data_models.agent_push_record import AgentPushRecord
  16. from pqai_agent.logging_service import logger
  17. from pqai_agent.mq_message import MessageType
  18. from pqai_agent.toolkit import get_tools
  19. from pqai_agent.utils.agent_abtest_utils import get_agent_abtest_config
  20. class TaskType(Enum):
  21. GENERATE = "generate"
  22. SEND = "send"
  23. def generate_task_rmq_message(topic: str, staff_id: str, user_id: str, task_type: TaskType, content: Optional[str] = None) -> rocketmq.Message:
  24. msg = rocketmq.Message()
  25. msg.topic = topic
  26. msg.body = json.dumps({
  27. 'staff_id': staff_id,
  28. 'user_id': user_id,
  29. 'task_type': task_type.value,
  30. # NOTE:通过传入JSON支持多模态消息
  31. 'content': content or '',
  32. 'timestamp': int(time.time() * 1000),
  33. }, ensure_ascii=False).encode('utf-8')
  34. msg.tag = task_type.value
  35. return msg
  36. class PushScanThread:
  37. # PushScanThread实际可以是AgentService的一个函数,从AgentService中独立的主要考虑因素为Push后续可能有拆分和扩展
  38. def __init__(self, staff_id: str, agent_service: 'AgentService', mq_topic: str, mq_producer: rocketmq.Producer):
  39. self.staff_id = staff_id
  40. # 需要大量使用AgentService内部的成员
  41. self.service = agent_service
  42. self.rmq_topic = mq_topic
  43. self.rmq_producer = mq_producer
  44. def run(self):
  45. white_list_tags = set(apollo_config.get_json_value('agent_initiate_whitelist_tags', []))
  46. first_initiate_tags = set(apollo_config.get_json_value('agent_first_initiate_whitelist_tags', []))
  47. # 合并白名单,减少配置成本
  48. white_list_tags.update(first_initiate_tags)
  49. for staff_user in self.service.user_relation_manager.list_staff_users(staff_id=self.staff_id):
  50. staff_id = staff_user['staff_id']
  51. user_id = staff_user['user_id']
  52. agent = self.service.get_agent_instance(staff_id, user_id)
  53. should_initiate = agent.should_initiate_conversation()
  54. user_tags = self.service.user_relation_manager.get_user_tags(user_id)
  55. if configs.get_env() != 'dev' and not white_list_tags.intersection(user_tags):
  56. should_initiate = False
  57. if should_initiate:
  58. logger.info(f"user[{user_id}], tags{user_tags}: generate a generation task for conversation initiation")
  59. rmq_msg = generate_task_rmq_message(self.rmq_topic, staff_id, user_id, TaskType.GENERATE)
  60. self.rmq_producer.send(rmq_msg)
  61. else:
  62. logger.debug(f"user[{user_id}], do not initiate conversation")
  63. class PushTaskWorkerPool:
  64. def __init__(self, agent_service: 'AgentService', mq_topic: str,
  65. mq_consumer: rocketmq.SimpleConsumer, mq_producer: rocketmq.Producer):
  66. self.agent_service = agent_service
  67. max_workers = configs.get()['system'].get('push_task_workers', 5)
  68. self.generate_executor = ThreadPoolExecutor(max_workers=max_workers)
  69. self.send_executors = {}
  70. self.rmq_topic = mq_topic
  71. self.consumer = mq_consumer
  72. self.producer = mq_producer
  73. self.loop_thread = None
  74. self.is_generator_running = True
  75. self.generate_send_done = False # set by wait_to_finish
  76. self.no_more_generate_task = False # set by self
  77. def start(self):
  78. self.loop_thread = Thread(target=self.process_push_tasks)
  79. self.loop_thread.start()
  80. def process_push_tasks(self):
  81. # RMQ consumer疑似有bug,创建后立即消费可能报NPE
  82. time.sleep(1)
  83. while True:
  84. msgs = self.consumer.receive(1, 300)
  85. if not msgs:
  86. # 没有生成任务在执行且没有消息,才可退出
  87. if self.generate_send_done:
  88. if not self.no_more_generate_task:
  89. logger.debug("no message received, there should be no more generate task")
  90. self.no_more_generate_task = True
  91. continue
  92. else:
  93. if self.is_generator_running:
  94. logger.debug("Waiting for generator threads to finish")
  95. continue
  96. else:
  97. break
  98. else:
  99. continue
  100. msg = msgs[0]
  101. task = json.loads(msg.body.decode('utf-8'))
  102. msg_time = datetime.fromtimestamp(task['timestamp'] / 1000).strftime("%Y-%m-%d %H:%M:%S")
  103. logger.debug(f"recv message:{msg_time} - {task}")
  104. if task['task_type'] == TaskType.GENERATE.value:
  105. self.generate_executor.submit(self.handle_generate_task, task, msg)
  106. elif task['task_type'] == TaskType.SEND.value:
  107. staff_id = task['staff_id']
  108. if staff_id not in self.send_executors:
  109. self.send_executors[staff_id] = ThreadPoolExecutor(max_workers=1)
  110. self.send_executors[staff_id].submit(self.handle_send_task, task, msg)
  111. else:
  112. logger.error(f"Unknown task type: {task['task_type']}")
  113. self.consumer.ack(msg)
  114. logger.info("PushGenerateWorkerPool stopped")
  115. def wait_to_finish(self):
  116. self.generate_send_done = True
  117. while not self.no_more_generate_task:
  118. #FIXME(zhoutian): condition variable should be used to replace time sleep
  119. time.sleep(1)
  120. self.generate_executor.shutdown(wait=True)
  121. self.is_generator_running = False
  122. self.loop_thread.join()
  123. def handle_send_task(self, task: Dict, msg: rocketmq.Message):
  124. try:
  125. staff_id = task['staff_id']
  126. user_id = task['user_id']
  127. agent = self.agent_service.get_agent_instance(staff_id, user_id)
  128. # 二次校验是否需要发送
  129. if not agent.should_initiate_conversation():
  130. logger.debug(f"user[{user_id}], do not initiate conversation")
  131. self.consumer.ack(msg)
  132. return
  133. contents: List[Dict] = json.loads(task['content'])
  134. if not contents:
  135. logger.debug(f"staff[{staff_id}], user[{user_id}]: empty content, do not send")
  136. self.consumer.ack(msg)
  137. return
  138. recent_dialogue = agent.dialogue_history[-10:]
  139. agent_voice_whitelist = set(apollo_config.get_json_value("agent_voice_whitelist", []))
  140. messages_to_send = []
  141. current_ts = int(time.time())
  142. for item in contents:
  143. item["timestamp"] = current_ts * 1000
  144. if item["type"] == "text":
  145. if staff_id not in agent_voice_whitelist:
  146. message_type = MessageType.TEXT
  147. else:
  148. message_type = self.agent_service.response_type_detector.detect_type(
  149. recent_dialogue, item, enable_random=True)
  150. response = agent.generate_response(item["content"])
  151. if response:
  152. messages_to_send.append({'type': message_type, 'content': response})
  153. else:
  154. message_type = MessageType.from_str(item["type"])
  155. response = agent.generate_multimodal_response(item)
  156. if response:
  157. item["type"] = message_type
  158. messages_to_send.append(item)
  159. with self.agent_service.agent_db_session_maker() as session:
  160. msg_list = [{"type": msg["type"].value, "content": msg["content"]} for msg in messages_to_send]
  161. record = AgentPushRecord(staff_id=staff_id, user_id=user_id,
  162. content=json.dumps(msg_list, ensure_ascii=False),
  163. timestamp=current_ts)
  164. session.add(record)
  165. session.commit()
  166. if messages_to_send:
  167. for response in messages_to_send:
  168. self.agent_service.send_multimodal_response(staff_id, user_id, response, skip_check=True)
  169. agent.update_last_active_interaction_time(current_ts)
  170. else:
  171. logger.debug(f"staff[{staff_id}], user[{user_id}]: generate empty response")
  172. self.consumer.ack(msg)
  173. except Exception as e:
  174. fmt_exc = traceback.format_exc()
  175. logger.error(f"Error processing message sending: {e}, {fmt_exc}")
  176. self.consumer.ack(msg)
  177. def handle_generate_task(self, task: Dict, msg: rocketmq.Message):
  178. try:
  179. staff_id = task['staff_id']
  180. user_id = task['user_id']
  181. main_agent = self.agent_service.get_agent_instance(staff_id, user_id)
  182. agent_config = get_agent_abtest_config('push', user_id,
  183. self.agent_service.service_module_manager,
  184. self.agent_service.agent_config_manager)
  185. if agent_config:
  186. push_agent = MessagePushAgent(model=agent_config.execution_model,
  187. system_prompt=agent_config.system_prompt,
  188. tools=get_tools(agent_config.tools))
  189. query_prompt_template = agent_config.task_prompt
  190. else:
  191. push_agent = MessagePushAgent()
  192. query_prompt_template = None
  193. message_to_user = push_agent.generate_message(
  194. context=main_agent.get_prompt_context(None),
  195. dialogue_history=self.agent_service.history_dialogue_db.get_dialogue_history_backward(
  196. staff_id, user_id, main_agent.last_interaction_time_ms, limit=100
  197. ),
  198. query_prompt_template=query_prompt_template
  199. )
  200. if message_to_user:
  201. rmq_message = generate_task_rmq_message(
  202. self.rmq_topic, staff_id, user_id, TaskType.SEND, json.dumps(message_to_user))
  203. self.producer.send(rmq_message)
  204. else:
  205. logger.info(f"staff[{staff_id}], user[{user_id}]: no push message generated")
  206. self.consumer.ack(msg)
  207. except Exception as e:
  208. fmt_exc = traceback.format_exc()
  209. logger.error(f"Error processing message generation: {e}, {fmt_exc}")
  210. # FIXME: 是否需要ACK
  211. self.consumer.ack(msg)