8 месяцев назад · 15c9c5e8d7
--- a/pqai_agent/agent_service.py
+++ b/pqai_agent/agent_service.py
@@ -240,7 +240,7 @@ class AgentService:
 
				             sys.exit(0)
			
 
				 
			
 
				     def _update_user_profile(self, user_id, user_profile, recent_dialogue: List[Dict]):
			
 
				-        profile_to_update = self.user_profile_extractor.extract_profile_info(user_profile, recent_dialogue)
			
 
				+        profile_to_update = self.user_profile_extractor.extract_profile_info_v2(user_profile, recent_dialogue)
			
 
				         if not profile_to_update:
			
 
				             logger.debug("user_id: {}, no profile info extracted".format(user_id))
			
 
				             return
			
--- a/pqai_agent/dialogue_manager.py
+++ b/pqai_agent/dialogue_manager.py
@@ -560,6 +560,7 @@ class DialogueManager:
 
				             "if_first_interaction": True if self.previous_state == DialogueState.INITIALIZED else False,
			
 
				             "if_active_greeting": False if user_message else True,
			
 
				             "formatted_staff_profile": prompt_utils.format_agent_profile(self.staff_profile),
			
 
				+            "formatted_user_profile": prompt_utils.format_user_profile(self.user_profile),
			
 
				             **self.user_profile,
			
 
				             **legacy_staff_profile
			
 
				         }
			
--- a/pqai_agent/prompt_templates.py
+++ b/pqai_agent/prompt_templates.py
@@ -211,6 +211,61 @@ USER_PROFILE_EXTRACT_PROMPT = """
 
				 请使用update_user_profile函数返回需要更新的信息，注意不要返回不需要更新的信息！
			
 
				 """
			
 
				 
			
 
				+USER_PROFILE_EXTRACT_PROMPT_V2 = """
			
 
				+请在已有的用户画像的基础上，仔细分析以下用户和客服的对话内容，完善用户的画像信息。
			
 
				+
			
 
				+# 对话历史格式
			
 
				+[用户][2025-05-29 22:06:14][文本] 内容...
			
 
				+[客服][2025-05-29 22:06:20][文本] 内容...
			
 
				+[用户][2025-05-29 22:06:33][文本] 内容...
			
 
				+## 特别说明
			
 
				+* 对话历史已通过[用户]/[客服]标签严格区分发言角色，除开头的角色标签外，其它均为对话的内容！
			
 
				+* 消息开头可能出现"丽丽："等冒号分隔结构，是对另一方的称呼，不是要将其视为对话发起人的身份标识！
			
 
				+
			
 
				+# 特征key定义及含义
			
 
				+- name: 姓名
			
 
				+- preferred_nickname: 用户希望对其的称呼
			
 
				+- gender: 性别
			
 
				+- age: 年龄
			
 
				+- region: 地区。用户常驻的地区，不是用户临时所在地
			
 
				+- health_conditions: 健康状况
			
 
				+- interests: 兴趣爱好
			
 
				+- interaction_frequency: 联系频率。每2天联系小于1次为low，每天联系1次为medium，未来均不再联系为stopped
			
 
				+- flexible_params: 动态特征
			
 
				+
			
 
				+# 当前已提取信息（可能为空或有错误）
			
 
				+{formatted_user_profile}
			
 
				+
			
 
				+# 对话历史
			
 
				+{dialogue_history}
			
 
				+
			
 
				+# 任务
			
 
				+在微信场景中，要与用户保持紧密沟通并提升互动质量，从历史沟通内容中系统性地提取极高置信度的用户信息
			
 
				+
			
 
				+# 要求
			
 
				+* 尽可能准确地识别用户的年龄、兴趣爱好、健康状况
			
 
				+* 关注用户生活、家庭等隐性信息
			
 
				+* 信息提取一定要有很高的准确性！如果无法确定具体信息，一定不要猜测！一定注意是用户自己的情况，而不是用户谈到的其它人的情况！
			
 
				+* 用户消息中出现的任何名称都视为对客服或第三方的称呼！除非用户明确使用类似"我叫"、"本名是"等自述句式，否则永远不要提取为姓名！
			
 
				+* 一定不要混淆用户和客服分别说的话！客服说的话只用于提供上下文，帮助理解对话语境！所有信息必须以用户说的为准！
			
 
				+* preferred_nickname提取需满足：用户明确使用"请叫我X"/"叫我X"/"称呼我X"等指令句式。排除用户对其他人的称呼。
			
 
				+* 一定不要把用户对客服的称呼当作preferred_nickname！一定不要把用户对客服的称呼当作preferred_nickname！
			
 
				+* 注意兴趣爱好的定义！兴趣爱好是为了乐趣或放松而进行的活动或消遣，必须是用户明确提到喜欢参与的活动，必须为动词或动名词。
			
 
				+* 兴趣爱好只保留最关键的5项。请合并相似的兴趣，不要保留多项相似的兴趣！注意兴趣爱好的定义！一定不要把用户短期的话题和需求当作兴趣爱好！
			
 
				+* 当前已提取的兴趣爱好并不一定准确，请判断当前兴趣爱好是否符合常理，如果不是一项活动或者根据对话历史判断它不是用户的兴趣爱好，请删除！
			
 
				+* 每个特征按照低/中/高区分，只保留高置信度特征
			
 
				+* 你需要自己提取对沟通有帮助的特征，放入flexible_params，key直接使用中文
			
 
				+* 除了flexible_params，其它key请严格遵循<特征key定义>中的要求，不要使用未定义的key！
			
 
				+
			
 
				+以JSON对象格式返回**需要更新**的信息，不要返回无需更新的信息！！如果无需更新任何信息，请返回{{}}，不要输出其它内容。示例输出：
			
 
				+{{
			
 
				+    "name": "张三",
			
 
				+    "flexible_params": {{
			
 
				+        "沟通特点": "使用四川方言"
			
 
				+    }}
			
 
				+}}
			
 
				+"""
			
 
				+
			
 
				 RESPONSE_TYPE_DETECT_PROMPT = """
			
 
				 # 角色设定
			
 
				 * 你是一位熟悉中老年用户交流习惯的智能客服，能够精准理解用户需求，提供专业、实用且有温度的建议。
			
--- a/pqai_agent/user_manager.py
+++ b/pqai_agent/user_manager.py
@@ -55,8 +55,6 @@ class UserManager(abc.ABC):
 
				             },
			
 
				             "interaction_style": "standard",  # standard, verbose, concise
			
 
				             "interaction_frequency": "medium",  # low, medium, high
			
 
				-            "last_topics": [],
			
 
				-            "created_at": int(time.time() * 1000),
			
 
				             "human_intervention_history": []
			
 
				         }
			
 
				         for key, value in kwargs.items():
			
--- a/pqai_agent/user_profile_extractor.py
+++ b/pqai_agent/user_profile_extractor.py
@@ -5,14 +5,28 @@
 
				 import json
			
 
				 from typing import Dict, Optional, List
			
 
				 
			
 
				+from sqlalchemy.testing.plugin.plugin_base import logging
			
 
				+
			
 
				 from pqai_agent import chat_service
			
 
				 from pqai_agent import configs
			
 
				-from pqai_agent.prompt_templates import USER_PROFILE_EXTRACT_PROMPT
			
 
				+from pqai_agent.prompt_templates import USER_PROFILE_EXTRACT_PROMPT, USER_PROFILE_EXTRACT_PROMPT_V2
			
 
				 from openai import OpenAI
			
 
				 from pqai_agent.logging_service import logger
			
 
				+from pqai_agent.utils import prompt_utils
			
 
				 
			
 
				 
			
 
				 class UserProfileExtractor:
			
 
				+    FIELDS = [
			
 
				+        "name",
			
 
				+        "preferred_nickname",
			
 
				+        "gender",
			
 
				+        "age",
			
 
				+        "region",
			
 
				+        "interests",
			
 
				+        "health_conditions",
			
 
				+        "interaction_frequency",
			
 
				+        "flexible_params"
			
 
				+    ]
			
 
				     def __init__(self):
			
 
				         self.llm_client = OpenAI(
			
 
				             api_key=chat_service.VOLCENGINE_API_TOKEN,
			
@@ -73,13 +87,14 @@ class UserProfileExtractor:
 
				             }
			
 
				         }
			
 
				 
			
 
				-    def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict]) -> str:
			
 
				+    def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict], prompt_template = USER_PROFILE_EXTRACT_PROMPT) -> str:
			
 
				         """
			
 
				         生成用于信息提取的系统提示词
			
 
				         """
			
 
				         context = user_profile.copy()
			
 
				         context['dialogue_history'] = self.compose_dialogue(dialogue_history)
			
 
				-        return USER_PROFILE_EXTRACT_PROMPT.format(**context)
			
 
				+        context['formatted_user_profile'] = prompt_utils.format_user_profile(user_profile)
			
 
				+        return prompt_template.format(**context)
			
 
				 
			
 
				     @staticmethod
			
 
				     def compose_dialogue(dialogue: List[Dict]) -> str:
			
@@ -130,15 +145,59 @@ class UserProfileExtractor:
 
				             logger.error(f"用户画像提取出错: {e}")
			
 
				             return None
			
 
				 
			
 
				+    def extract_profile_info_v2(self, user_profile: Dict, dialogue_history: List[Dict]) -> Optional[Dict]:
			
 
				+        """
			
 
				+        使用JSON输出提取用户画像信息
			
 
				+        :param user_profile:
			
 
				+        :param dialogue_history:
			
 
				+        :return:
			
 
				+        """
			
 
				+        if configs.get().get('debug_flags', {}).get('disable_llm_api_call', False):
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            logger.debug("try to extract profile from message: {}".format(dialogue_history))
			
 
				+            prompt = self.generate_extraction_prompt(user_profile, dialogue_history, USER_PROFILE_EXTRACT_PROMPT_V2)
			
 
				+            print(prompt)
			
 
				+            response = self.llm_client.chat.completions.create(
			
 
				+                model=self.model_name,
			
 
				+                messages=[
			
 
				+                    {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
			
 
				+                    {"role": "user", "content": prompt}
			
 
				+                ],
			
 
				+                temperature=0
			
 
				+            )
			
 
				+            json_data = response.choices[0].message.content \
			
 
				+                .replace("```", "").replace("```json", "").strip()
			
 
				+            try:
			
 
				+                profile_info = json.loads(json_data)
			
 
				+            except json.JSONDecodeError as e:
			
 
				+                logger.error(f"Error in JSON decode: {e}, original input: {json_data}")
			
 
				+                return None
			
 
				+            return profile_info
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"用户画像提取出错: {e}")
			
 
				+            return None
			
 
				+
			
 
				     def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict:
			
 
				         """
			
 
				         合并新提取的用户信息到现有资料
			
 
				         """
			
 
				         merged_profile = existing_profile.copy()
			
 
				-        merged_profile.update(new_info)
			
 
				+        for field in new_info:
			
 
				+            if field in self.FIELDS:
			
 
				+                merged_profile[field] = new_info[field]
			
 
				+            else:
			
 
				+                logger.warning(f"Unknown field in new profile: {field}")
			
 
				         return merged_profile
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				+    from pqai_agent import configs
			
 
				+    from pqai_agent import logging_service
			
 
				+    logging_service.setup_root_logger()
			
 
				+    config = configs.get()
			
 
				+    config['debug_flags']['disable_llm_api_call'] = False
			
 
				     extractor = UserProfileExtractor()
			
 
				     current_profile = {
			
 
				         'name': '',
			
@@ -152,11 +211,11 @@ if __name__ == '__main__':
 
				         'interaction_frequency': 'medium'
			
 
				     }
			
 
				     messages= [
			
 
				-        {'role': 'user', 'content': "没有任何问题放心，不会骚扰你了，再见"}
			
 
				+        {'role': 'user', 'content': "没有任何问题放心，以后不要再发了，再见"}
			
 
				     ]
			
 
				 
			
 
				-    resp = extractor.extract_profile_info(current_profile, messages)
			
 
				-    print(resp)
			
 
				+    # resp = extractor.extract_profile_info_v2(current_profile, messages)
			
 
				+    # logger.warning(resp)
			
 
				     message = "好的，孩子，我是老李头，今年68啦，住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋，偶尔也跟年轻人学学新鲜事儿。\n" \
			
 
				               "你叫我李叔就行，有啥事儿咱们慢慢聊啊\n" \
			
 
				               "哎，今儿个天气不错啊，我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极，我也跟着比划了两下，这老胳膊老腿的，原来老不舒服，活动活动舒坦多了!\n" \
			
@@ -165,9 +224,10 @@ if __name__ == '__main__':
 
				     messages = []
			
 
				     for line in message.split("\n"):
			
 
				         messages.append({'role': 'user', 'content': line})
			
 
				-    resp = extractor.extract_profile_info(current_profile, messages)
			
 
				-    print(resp)
			
 
				-    print(extractor.merge_profile_info(current_profile, resp))
			
 
				+    resp = extractor.extract_profile_info_v2(current_profile, messages)
			
 
				+    logger.warning(resp)
			
 
				+    merged_profile = extractor.merge_profile_info(current_profile, resp)
			
 
				+    logger.warning(merged_profile)
			
 
				     current_profile = {
			
 
				         'name': '李老头',
			
 
				         'preferred_nickname': '李叔',
			
@@ -179,6 +239,6 @@ if __name__ == '__main__':
 
				         'interests': ['养生', '下象棋'],
			
 
				         'interaction_frequency': 'medium'
			
 
				     }
			
 
				-    resp = extractor.extract_profile_info(current_profile, messages)
			
 
				-    print(resp)
			
 
				-    print(extractor.merge_profile_info(current_profile, resp))
			
 
				+    resp = extractor.extract_profile_info_v2(merged_profile, messages)
			
 
				+    logger.warning(resp)
			
 
				+    logger.warning(extractor.merge_profile_info(current_profile, resp))
			
--- a/pqai_agent/utils/prompt_utils.py
+++ b/pqai_agent/utils/prompt_utils.py
@@ -39,21 +39,26 @@ def format_user_profile(profile: Dict) -> str:
 
				     """
			
 
				     fields = [
			
 
				         ('nickname', '微信昵称'),
			
 
				+        ('preferred_nickname', '希望对其的称呼'),
			
 
				         ('name', '姓名'),
			
 
				         ('avatar', '头像'),
			
 
				-        ('preferred_nickname', '偏好的称呼'),
			
 
				+        ('gender', '性别'),
			
 
				         ('age', '年龄'),
			
 
				         ('region', '地区'),
			
 
				         ('health_conditions', '健康状况'),
			
 
				-        ('medications', '用药信息'),
			
 
				-        ('interests', '兴趣爱好')
			
 
				+        ('interests', '兴趣爱好'),
			
 
				+        ('interaction_frequency', '联系频率'),
			
 
				+        ('flexible_params', '动态特征'),
			
 
				     ]
			
 
				     strings_to_join = []
			
 
				     for field in fields:
			
 
				-        if not profile.get(field[0], None):
			
 
				+        value = profile.get(field[0], None)
			
 
				+        if not value:
			
 
				             continue
			
 
				-        if isinstance(profile[field[0]], list):
			
 
				-            value = ','.join(profile[field[0]])
			
 
				+        if isinstance(value, list):
			
 
				+            value = '，'.join(value)
			
 
				+        elif isinstance(value, dict):
			
 
				+            value = '；'.join(f"{k}: {v}" for k, v in value.items())
			
 
				         else:
			
 
				             value = profile[field[0]]
			
 
				         cur_string = f"- {field[1]}：{value}"