#! /usr/bin/env python # -*- coding: utf-8 -*- # vim:fenc=utf-8 import json from typing import Dict, Any, Optional, List import chat_service import configs from prompt_templates import USER_PROFILE_EXTRACT_PROMPT from openai import OpenAI from logging_service import logger class UserProfileExtractor: def __init__(self): self.llm_client = OpenAI( api_key=chat_service.VOLCENGINE_API_TOKEN, base_url=chat_service.VOLCENGINE_BASE_URL ) self.model_name = chat_service.VOLCENGINE_MODEL_DEEPSEEK_V3 @staticmethod def get_extraction_function() -> Dict: """ 定义用于用户画像信息提取的Function Calling函数 """ return { "type": "function", "function": { "name": "update_user_profile", "description": "从用户对话中提取并更新用户的个人信息", "parameters": { "type": "object", "properties": { "name": { "type": "string", "description": "用户的姓名,如果能够准确识别" }, "preferred_nickname": { "type": "string", "description": "用户希望客服对用户的称呼,如果用户明确提到" }, "gender": { "type": "string", "description": "用户的性别,男或女,如果不能准确识别则为未知" }, "age": { "type": "integer", "description": "用户的年龄,如果能够准确识别" }, "region": { "type": "string", "description": "用户常驻的地区,不是用户临时所在地" }, "interests": { "type": "array", "items": {"type": "string"}, "description": "用户提到的自己的兴趣爱好" }, "health_conditions": { "type": "array", "items": {"type": "string"}, "description": "用户提及的健康状况" }, "interaction_frequency": { "type": "string", "description": "用户期望的交互频率。每2天联系小于1次为low,每天联系1次为medium,不再联系为stopped" } }, "required": [] } } } def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict]) -> str: """ 生成用于信息提取的系统提示词 """ context = user_profile.copy() context['dialogue_history'] = self.compose_dialogue(dialogue_history) return USER_PROFILE_EXTRACT_PROMPT.format(**context) @staticmethod def compose_dialogue(dialogue: List[Dict]) -> str: role_map = {'user': '用户', 'assistant': '客服'} messages = [] for msg in dialogue: if not msg['content']: continue if msg['role'] not in role_map: continue messages.append('[{}] {}'.format(role_map[msg['role']], msg['content'])) return '\n'.join(messages) def extract_profile_info(self, user_profile, dialogue_history: List[Dict]) -> Optional[Dict]: """ 使用Function Calling提取用户画像信息 """ if configs.get().get('debug_flags', {}).get('disable_llm_api_call', False): return None try: logger.debug("try to extract profile from message: {}".format(dialogue_history)) prompt = self.generate_extraction_prompt(user_profile, dialogue_history) response = self.llm_client.chat.completions.create( model=self.model_name, messages=[ {"role": "system", "content": '你是一个专业的用户画像分析助手。'}, {"role": "user", "content": prompt} ], tools=[self.get_extraction_function()], temperature=0 ) # 解析Function Call的参数 tool_calls = response.choices[0].message.tool_calls logger.debug(response) if tool_calls: function_call = tool_calls[0] if function_call.function.name == 'update_user_profile': try: profile_info = json.loads(function_call.function.arguments) return {k: v for k, v in profile_info.items() if v} except json.JSONDecodeError: logger.error("无法解析提取的用户信息") return None except Exception as e: logger.error(f"用户画像提取出错: {e}") return None def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict: """ 合并新提取的用户信息到现有资料 """ merged_profile = existing_profile.copy() merged_profile.update(new_info) return merged_profile if __name__ == '__main__': extractor = UserProfileExtractor() current_profile = { 'name': '', 'preferred_nickname': '李叔', "gender": "男", 'age': 0, 'region': '北京', 'health_conditions': [], 'medications': [], 'interests': [], 'interaction_frequency': 'medium' } messages= [ {'role': 'user', 'content': "没有任何问题放心,不会骚扰你了,再见"} ] resp = extractor.extract_profile_info(current_profile, messages) print(resp) message = "好的,孩子,我是老李头,今年68啦,住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋,偶尔也跟年轻人学学新鲜事儿。\n" \ "你叫我李叔就行,有啥事儿咱们慢慢聊啊\n" \ "哎,今儿个天气不错啊,我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极,我也跟着比划了两下,这老胳膊老腿的,原来老不舒服,活动活动舒坦多了!\n" \ "你吃饭了没?我们这儿中午吃的打卤面,老伴儿做的,香得很!这人老了就爱念叨些家长里短的,你可别嫌我啰嗦啊。\n" \ "对了,最近我孙子教我发语音,比打字方便多啦!就是有时候一激动,说话声音太大,把手机都给震得嗡嗡响\n" messages = [] for line in message.split("\n"): messages.append({'role': 'user', 'content': line}) resp = extractor.extract_profile_info(current_profile, messages) print(resp) print(extractor.merge_profile_info(current_profile, resp)) current_profile = { 'name': '李老头', 'preferred_nickname': '李叔', "gender": "男", 'age': 68, 'region': '北京市海淀区', 'health_conditions': [], 'medications': [], 'interests': ['养生', '下象棋'], 'interaction_frequency': 'medium' } resp = extractor.extract_profile_info(current_profile, messages) print(resp) print(extractor.merge_profile_info(current_profile, resp))