user_profile_extractor.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. #! /usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # vim:fenc=utf-8
  4. import json
  5. from typing import Dict, Any, Optional
  6. from datetime import datetime
  7. from openai import OpenAI
  8. import logging
  9. import global_flags
  10. class UserProfileExtractor:
  11. def __init__(self):
  12. self.llm_client = OpenAI(
  13. api_key='5e275c38-44fd-415f-abcf-4b59f6377f72',
  14. base_url="https://ark.cn-beijing.volces.com/api/v3"
  15. )
  16. self.model_name = 'ep-20250307150409-4blz9'
  17. def get_extraction_function(self) -> Dict:
  18. """
  19. 定义用于用户画像信息提取的Function Calling函数
  20. """
  21. return {
  22. "type": "function",
  23. "function": {
  24. "name": "update_user_profile",
  25. "description": "从用户对话中提取并更新用户的个人信息",
  26. "parameters": {
  27. "type": "object",
  28. "properties": {
  29. "name": {
  30. "type": "string",
  31. "description": "用户的姓名,如果能够准确识别"
  32. },
  33. "preferred_nickname": {
  34. "type": "string",
  35. "description": "用户希望对其的称呼,如果能够准确识别"
  36. },
  37. "age": {
  38. "type": "integer",
  39. "description": "用户的年龄,如果能够准确识别"
  40. },
  41. "region": {
  42. "type": "string",
  43. "description": "用户所在地区"
  44. },
  45. "interests": {
  46. "type": "array",
  47. "items": {"type": "string"},
  48. "description": "用户提到的自己的兴趣爱好"
  49. },
  50. "health_conditions": {
  51. "type": "array",
  52. "items": {"type": "string"},
  53. "description": "用户提及的健康状况"
  54. }
  55. },
  56. "required": []
  57. }
  58. }
  59. }
  60. def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: str) -> str:
  61. """
  62. 生成用于信息提取的系统提示词
  63. """
  64. context = user_profile.copy()
  65. context['dialogue_history'] = dialogue_history
  66. return """
  67. 请在已有的用户画像的基础上,仔细分析以下对话内容,完善用户的画像信息。
  68. 已知信息(可能为空):
  69. - 姓名:{name}
  70. - 希望的称呼:{preferred_nickname}
  71. - 年龄:{age}
  72. - 地区:{region}
  73. - 健康状况:{health_conditions}
  74. - 兴趣爱好:{interests}
  75. 对话历史:
  76. {dialogue_history}
  77. 提取要求:
  78. 1. 尽可能准确地识别用户的年龄、兴趣爱好、健康状况
  79. 2. 关注用户生活、家庭等隐性信息
  80. 3. 信息提取需要有较高的置信度,兴趣爱好只保留用户明确喜欢且最关键的5项
  81. 4. 如果无法确定具体信息,请不要猜测
  82. 请使用update_user_profile函数返回需要更新的信息,注意不要返回无需更新的信息。
  83. """.format(**context)
  84. def extract_profile_info(self, user_profile, dialogue_history: str) -> Optional[Dict]:
  85. """
  86. 使用Function Calling提取用户画像信息
  87. """
  88. if global_flags.DISABLE_LLM_API_CALL:
  89. return None
  90. try:
  91. logging.debug("try to extract profile from message: {}".format(dialogue_history))
  92. response = self.llm_client.chat.completions.create(
  93. model=self.model_name,
  94. messages=[
  95. {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
  96. {"role": "user", "content": self.generate_extraction_prompt(user_profile, dialogue_history)}
  97. ],
  98. tools=[self.get_extraction_function()],
  99. temperature=0
  100. )
  101. # 解析Function Call的参数
  102. tool_calls = response.choices[0].message.tool_calls
  103. logging.debug(response)
  104. if tool_calls:
  105. function_call = tool_calls[0]
  106. if function_call.function.name == 'update_user_profile':
  107. try:
  108. profile_info = json.loads(function_call.function.arguments)
  109. return {k: v for k, v in profile_info.items() if v}
  110. except json.JSONDecodeError:
  111. logging.error("无法解析提取的用户信息")
  112. return None
  113. except Exception as e:
  114. logging.error(f"用户画像提取出错: {e}")
  115. return None
  116. def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict:
  117. """
  118. 合并新提取的用户信息到现有资料
  119. """
  120. merged_profile = existing_profile.copy()
  121. merged_profile.update(new_info)
  122. return merged_profile
  123. if __name__ == '__main__':
  124. extractor = UserProfileExtractor()
  125. current_profile = {
  126. 'name': '',
  127. 'preferred_nickname': '',
  128. 'age': 0,
  129. 'region': '',
  130. 'health_conditions': [],
  131. 'medications': [],
  132. 'interests': []
  133. }
  134. message = "好的,孩子,我是老李头,今年68啦,住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋,偶尔也跟年轻人学学新鲜事儿。\n" \
  135. "你叫我李叔就行,有啥事儿咱们慢慢聊啊\n" \
  136. "哎,今儿个天气不错啊,我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极,我也跟着比划了两下,这老胳膊老腿的,原来老不舒服,活动活动舒坦多了!\n" \
  137. "你吃饭了没?我们这儿中午吃的打卤面,老伴儿做的,香得很!这人老了就爱念叨些家长里短的,你可别嫌我啰嗦啊。\n" \
  138. "对了,最近我孙子教我发语音,比打字方便多啦!就是有时候一激动,说话声音太大,把手机都给震得嗡嗡响\n"
  139. resp = extractor.extract_profile_info(current_profile, message)
  140. print(resp)
  141. print(extractor.merge_profile_info(current_profile, resp))
  142. current_profile = {
  143. 'name': '李老头',
  144. 'preferred_nickname': '李叔',
  145. 'age': 68,
  146. 'region': '北京市海淀区',
  147. 'health_conditions': [],
  148. 'medications': [],
  149. 'interests': ['养生', '下象棋']
  150. }
  151. resp = extractor.extract_profile_info(current_profile, message)
  152. print(resp)
  153. print(extractor.merge_profile_info(current_profile, resp))