Преглед на файлове

Merge branch 'feature/202506-profile-extract-v2' of Server/AgentCoreService into master

fengzhoutian преди 2 дни
родител
ревизия
78c7657d3c

+ 1 - 1
pqai_agent/agent_service.py

@@ -240,7 +240,7 @@ class AgentService:
             sys.exit(0)
 
     def _update_user_profile(self, user_id, user_profile, recent_dialogue: List[Dict]):
-        profile_to_update = self.user_profile_extractor.extract_profile_info(user_profile, recent_dialogue)
+        profile_to_update = self.user_profile_extractor.extract_profile_info_v2(user_profile, recent_dialogue)
         if not profile_to_update:
             logger.debug("user_id: {}, no profile info extracted".format(user_id))
             return

+ 1 - 0
pqai_agent/dialogue_manager.py

@@ -560,6 +560,7 @@ class DialogueManager:
             "if_first_interaction": True if self.previous_state == DialogueState.INITIALIZED else False,
             "if_active_greeting": False if user_message else True,
             "formatted_staff_profile": prompt_utils.format_agent_profile(self.staff_profile),
+            "formatted_user_profile": prompt_utils.format_user_profile(self.user_profile),
             **self.user_profile,
             **legacy_staff_profile
         }

+ 55 - 0
pqai_agent/prompt_templates.py

@@ -211,6 +211,61 @@ USER_PROFILE_EXTRACT_PROMPT = """
 请使用update_user_profile函数返回需要更新的信息,注意不要返回不需要更新的信息!
 """
 
+USER_PROFILE_EXTRACT_PROMPT_V2 = """
+请在已有的用户画像的基础上,仔细分析以下用户和客服的对话内容,完善用户的画像信息。
+
+# 对话历史格式
+[用户][2025-05-29 22:06:14][文本] 内容...
+[客服][2025-05-29 22:06:20][文本] 内容...
+[用户][2025-05-29 22:06:33][文本] 内容...
+## 特别说明
+* 对话历史已通过[用户]/[客服]标签严格区分发言角色,除开头的角色标签外,其它均为对话的内容!
+* 消息开头可能出现"丽丽:"等冒号分隔结构,是对另一方的称呼,不是要将其视为对话发起人的身份标识!
+
+# 特征key定义及含义
+- name: 姓名
+- preferred_nickname: 用户希望对其的称呼
+- gender: 性别
+- age: 年龄
+- region: 地区。用户常驻的地区,不是用户临时所在地
+- health_conditions: 健康状况
+- interests: 兴趣爱好
+- interaction_frequency: 联系频率。每2天联系小于1次为low,每天联系1次为medium,未来均不再联系为stopped
+- flexible_params: 动态特征
+
+# 当前已提取信息(可能为空或有错误)
+{formatted_user_profile}
+
+# 对话历史
+{dialogue_history}
+
+# 任务
+在微信场景中,要与用户保持紧密沟通并提升互动质量,从历史沟通内容中系统性地提取极高置信度的用户信息
+
+# 要求
+* 尽可能准确地识别用户的年龄、兴趣爱好、健康状况
+* 关注用户生活、家庭等隐性信息
+* 信息提取一定要有很高的准确性!如果无法确定具体信息,一定不要猜测!一定注意是用户自己的情况,而不是用户谈到的其它人的情况!
+* 用户消息中出现的任何名称都视为对客服或第三方的称呼!除非用户明确使用类似"我叫"、"本名是"等自述句式,否则永远不要提取为姓名!
+* 一定不要混淆用户和客服分别说的话!客服说的话只用于提供上下文,帮助理解对话语境!所有信息必须以用户说的为准!
+* preferred_nickname提取需满足:用户明确使用"请叫我X"/"叫我X"/"称呼我X"等指令句式。排除用户对其他人的称呼。
+* 一定不要把用户对客服的称呼当作preferred_nickname!一定不要把用户对客服的称呼当作preferred_nickname!
+* 注意兴趣爱好的定义!兴趣爱好是为了乐趣或放松而进行的活动或消遣,必须是用户明确提到喜欢参与的活动,必须为动词或动名词。
+* 兴趣爱好只保留最关键的5项。请合并相似的兴趣,不要保留多项相似的兴趣!注意兴趣爱好的定义!一定不要把用户短期的话题和需求当作兴趣爱好!
+* 当前已提取的兴趣爱好并不一定准确,请判断当前兴趣爱好是否符合常理,如果不是一项活动或者根据对话历史判断它不是用户的兴趣爱好,请删除!
+* 每个特征按照低/中/高区分,只保留高置信度特征
+* 你需要自己提取对沟通有帮助的特征,放入flexible_params,key直接使用中文
+* 除了flexible_params,其它key请严格遵循<特征key定义>中的要求,不要使用未定义的key!
+
+以JSON对象格式返回**需要更新**的信息,不要返回无需更新的信息!!如果无需更新任何信息,请返回{{}},不要输出其它内容。示例输出:
+{{
+    "name": "张三",
+    "flexible_params": {{
+        "沟通特点": "使用四川方言"
+    }}
+}}
+"""
+
 RESPONSE_TYPE_DETECT_PROMPT = """
 # 角色设定
 * 你是一位熟悉中老年用户交流习惯的智能客服,能够精准理解用户需求,提供专业、实用且有温度的建议。

+ 0 - 2
pqai_agent/user_manager.py

@@ -55,8 +55,6 @@ class UserManager(abc.ABC):
             },
             "interaction_style": "standard",  # standard, verbose, concise
             "interaction_frequency": "medium",  # low, medium, high
-            "last_topics": [],
-            "created_at": int(time.time() * 1000),
             "human_intervention_history": []
         }
         for key, value in kwargs.items():

+ 85 - 21
pqai_agent/user_profile_extractor.py

@@ -5,20 +5,36 @@
 import json
 from typing import Dict, Optional, List
 
-from pqai_agent import chat_service
-from pqai_agent import configs
-from pqai_agent.prompt_templates import USER_PROFILE_EXTRACT_PROMPT
+from pqai_agent import chat_service, configs
+from pqai_agent.prompt_templates import USER_PROFILE_EXTRACT_PROMPT, USER_PROFILE_EXTRACT_PROMPT_V2
 from openai import OpenAI
 from pqai_agent.logging_service import logger
+from pqai_agent.utils import prompt_utils
 
 
 class UserProfileExtractor:
-    def __init__(self):
-        self.llm_client = OpenAI(
-            api_key=chat_service.VOLCENGINE_API_TOKEN,
-            base_url=chat_service.VOLCENGINE_BASE_URL
-        )
-        self.model_name = chat_service.VOLCENGINE_MODEL_DEEPSEEK_V3
+    FIELDS = [
+        "name",
+        "preferred_nickname",
+        "gender",
+        "age",
+        "region",
+        "interests",
+        "health_conditions",
+        "interaction_frequency",
+        "flexible_params"
+    ]
+    def __init__(self, model_name=None, llm_client=None):
+        if not llm_client:
+            self.llm_client = OpenAI(
+                api_key=chat_service.VOLCENGINE_API_TOKEN,
+                base_url=chat_service.VOLCENGINE_BASE_URL
+            )
+        else:
+            self.llm_client = llm_client
+        if not model_name:
+            model_name = chat_service.VOLCENGINE_MODEL_DEEPSEEK_V3
+        self.model_name = model_name
 
     @staticmethod
     def get_extraction_function() -> Dict:
@@ -73,13 +89,14 @@ class UserProfileExtractor:
             }
         }
 
-    def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict]) -> str:
+    def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict], prompt_template = USER_PROFILE_EXTRACT_PROMPT) -> str:
         """
         生成用于信息提取的系统提示词
         """
         context = user_profile.copy()
         context['dialogue_history'] = self.compose_dialogue(dialogue_history)
-        return USER_PROFILE_EXTRACT_PROMPT.format(**context)
+        context['formatted_user_profile'] = prompt_utils.format_user_profile(user_profile)
+        return prompt_template.format(**context)
 
     @staticmethod
     def compose_dialogue(dialogue: List[Dict]) -> str:
@@ -130,15 +147,61 @@ class UserProfileExtractor:
             logger.error(f"用户画像提取出错: {e}")
             return None
 
+    def extract_profile_info_v2(self, user_profile: Dict, dialogue_history: List[Dict], prompt_template: Optional[str] = None) -> Optional[Dict]:
+        """
+        使用JSON输出提取用户画像信息
+        :param user_profile:
+        :param dialogue_history:
+        :param prompt_template: 可选的自定义提示模板
+        :return:
+        """
+        if configs.get().get('debug_flags', {}).get('disable_llm_api_call', False):
+            return None
+
+        try:
+            logger.debug("try to extract profile from message: {}".format(dialogue_history))
+            prompt_template = prompt_template or USER_PROFILE_EXTRACT_PROMPT_V2
+            prompt = self.generate_extraction_prompt(user_profile, dialogue_history, prompt_template)
+            print(prompt)
+            response = self.llm_client.chat.completions.create(
+                model=self.model_name,
+                messages=[
+                    {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0
+            )
+            json_data = response.choices[0].message.content \
+                .replace("```", "").replace("```json", "").strip()
+            try:
+                profile_info = json.loads(json_data)
+            except json.JSONDecodeError as e:
+                logger.error(f"Error in JSON decode: {e}, original input: {json_data}")
+                return None
+            return profile_info
+
+        except Exception as e:
+            logger.error(f"用户画像提取出错: {e}")
+            return None
+
     def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict:
         """
         合并新提取的用户信息到现有资料
         """
         merged_profile = existing_profile.copy()
-        merged_profile.update(new_info)
+        for field in new_info:
+            if field in self.FIELDS:
+                merged_profile[field] = new_info[field]
+            else:
+                logger.warning(f"Unknown field in new profile: {field}")
         return merged_profile
 
 if __name__ == '__main__':
+    from pqai_agent import configs
+    from pqai_agent import logging_service
+    logging_service.setup_root_logger()
+    config = configs.get()
+    config['debug_flags']['disable_llm_api_call'] = False
     extractor = UserProfileExtractor()
     current_profile = {
         'name': '',
@@ -152,11 +215,11 @@ if __name__ == '__main__':
         'interaction_frequency': 'medium'
     }
     messages= [
-        {'role': 'user', 'content': "没有任何问题放心,不会骚扰你了,再见"}
+        {'role': 'user', 'content': "没有任何问题放心,以后不要再发了,再见"}
     ]
 
-    resp = extractor.extract_profile_info(current_profile, messages)
-    print(resp)
+    # resp = extractor.extract_profile_info_v2(current_profile, messages)
+    # logger.warning(resp)
     message = "好的,孩子,我是老李头,今年68啦,住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋,偶尔也跟年轻人学学新鲜事儿。\n" \
               "你叫我李叔就行,有啥事儿咱们慢慢聊啊\n" \
               "哎,今儿个天气不错啊,我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极,我也跟着比划了两下,这老胳膊老腿的,原来老不舒服,活动活动舒坦多了!\n" \
@@ -165,9 +228,10 @@ if __name__ == '__main__':
     messages = []
     for line in message.split("\n"):
         messages.append({'role': 'user', 'content': line})
-    resp = extractor.extract_profile_info(current_profile, messages)
-    print(resp)
-    print(extractor.merge_profile_info(current_profile, resp))
+    resp = extractor.extract_profile_info_v2(current_profile, messages)
+    logger.warning(resp)
+    merged_profile = extractor.merge_profile_info(current_profile, resp)
+    logger.warning(merged_profile)
     current_profile = {
         'name': '李老头',
         'preferred_nickname': '李叔',
@@ -179,6 +243,6 @@ if __name__ == '__main__':
         'interests': ['养生', '下象棋'],
         'interaction_frequency': 'medium'
     }
-    resp = extractor.extract_profile_info(current_profile, messages)
-    print(resp)
-    print(extractor.merge_profile_info(current_profile, resp))
+    resp = extractor.extract_profile_info_v2(merged_profile, messages)
+    logger.warning(resp)
+    logger.warning(extractor.merge_profile_info(current_profile, resp))

+ 11 - 6
pqai_agent/utils/prompt_utils.py

@@ -39,21 +39,26 @@ def format_user_profile(profile: Dict) -> str:
     """
     fields = [
         ('nickname', '微信昵称'),
+        ('preferred_nickname', '希望对其的称呼'),
         ('name', '姓名'),
         ('avatar', '头像'),
-        ('preferred_nickname', '偏好的称呼'),
+        ('gender', '性别'),
         ('age', '年龄'),
         ('region', '地区'),
         ('health_conditions', '健康状况'),
-        ('medications', '用药信息'),
-        ('interests', '兴趣爱好')
+        ('interests', '兴趣爱好'),
+        ('interaction_frequency', '联系频率'),
+        ('flexible_params', '动态特征'),
     ]
     strings_to_join = []
     for field in fields:
-        if not profile.get(field[0], None):
+        value = profile.get(field[0], None)
+        if not value:
             continue
-        if isinstance(profile[field[0]], list):
-            value = ','.join(profile[field[0]])
+        if isinstance(value, list):
+            value = ','.join(value)
+        elif isinstance(value, dict):
+            value = ';'.join(f"{k}: {v}" for k, v in value.items())
         else:
             value = profile[field[0]]
         cur_string = f"- {field[1]}:{value}"

+ 1 - 1
pqai_agent_server/api_server.py

@@ -139,7 +139,7 @@ def get_base_prompt():
     prompt_map = {
         'greeting': prompt_templates.GENERAL_GREETING_PROMPT,
         'chitchat': prompt_templates.CHITCHAT_PROMPT_COZE,
-        'profile_extractor': prompt_templates.USER_PROFILE_EXTRACT_PROMPT,
+        'profile_extractor': prompt_templates.USER_PROFILE_EXTRACT_PROMPT_V2,
         'response_type_detector': prompt_templates.RESPONSE_TYPE_DETECT_PROMPT,
         'custom_debugging': '',
     }

+ 13 - 27
pqai_agent_server/utils/prompt_util.py

@@ -41,8 +41,7 @@ def compose_openai_chat_messages_no_time(dialogue_history, multimodal=False):
             messages.append({"role": role, "content": f'{entry["content"]}'})
     return messages
 
-
-def run_openai_chat(messages, model_name, **kwargs):
+def create_llm_client(model_name):
     volcengine_models = [
         chat_service.VOLCENGINE_MODEL_DOUBAO_PRO_32K,
         chat_service.VOLCENGINE_MODEL_DOUBAO_PRO_1_5,
@@ -72,6 +71,11 @@ def run_openai_chat(messages, model_name, **kwargs):
         )
     else:
         raise Exception("model not supported")
+    return llm_client
+
+
+def run_openai_chat(messages, model_name, **kwargs):
+    llm_client = create_llm_client(model_name)
     response = llm_client.chat.completions.create(
         messages=messages, model=model_name, **kwargs
     )
@@ -79,36 +83,18 @@ def run_openai_chat(messages, model_name, **kwargs):
     return response
 
 
-def run_extractor_prompt(req_data):
+def run_extractor_prompt(req_data) -> Dict[str, str]:
     prompt = req_data["prompt"]
     user_profile = req_data["user_profile"]
-    staff_profile = req_data["staff_profile"]
     dialogue_history = req_data["dialogue_history"]
     model_name = req_data["model_name"]
-    prompt_context = {
-        "formatted_staff_profile": format_agent_profile(staff_profile),
-        **user_profile,
-        "dialogue_history": UserProfileExtractor.compose_dialogue(dialogue_history),
-    }
-    prompt = prompt.format(**prompt_context)
-    messages = [
-        {"role": "system", "content": "你是一个专业的用户画像分析助手。"},
-        {"role": "user", "content": prompt},
-    ]
-    tools = [UserProfileExtractor.get_extraction_function()]
-    response = run_openai_chat(messages, model_name, tools=tools, temperature=0)
-    tool_calls = response.choices[0].message.tool_calls
-    if tool_calls:
-        function_call = tool_calls[0]
-        if function_call.function.name == "update_user_profile":
-            profile_info = json.loads(function_call.function.arguments)
-            return {k: v for k, v in profile_info.items() if v}
-        else:
-            logger.error("llm does not return update_user_profile")
-            return {}
-    else:
+    llm_client = create_llm_client(model_name)
+    extractor = UserProfileExtractor(model_name=model_name, llm_client=llm_client)
+    profile_to_update = extractor.extract_profile_info_v2(user_profile, dialogue_history, prompt)
+    logger.info(profile_to_update)
+    if not profile_to_update:
         return {}
-
+    return profile_to_update
 
 def run_chat_prompt(req_data):
     prompt = req_data["prompt"]