Server
/
AgentCoreService


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
							#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8

import json
from typing import Dict, Optional, List

from pqai_agent import chat_service, configs
from pqai_agent.prompt_templates import USER_PROFILE_EXTRACT_PROMPT, USER_PROFILE_EXTRACT_PROMPT_V2
from openai import OpenAI
from pqai_agent.logging_service import logger
from pqai_agent.utils import prompt_utils


class UserProfileExtractor:
    FIELDS = [
        "name",
        "preferred_nickname",
        "gender",
        "age",
        "region",
        "interests",
        "health_conditions",
        "interaction_frequency",
        "flexible_params"
    ]
    def __init__(self, model_name=None, llm_client=None):
        if not llm_client:
            self.llm_client = OpenAI(
                api_key=chat_service.VOLCENGINE_API_TOKEN,
                base_url=chat_service.VOLCENGINE_BASE_URL
            )
        else:
            self.llm_client = llm_client
        if not model_name:
            model_name = chat_service.VOLCENGINE_MODEL_DEEPSEEK_V3
        self.model_name = model_name

    @staticmethod
    def get_extraction_function() -> Dict:
        """
        定义用于用户画像信息提取的Function Calling函数
        """
        return {
            "type": "function",
            "function": {
                "name": "update_user_profile",
                "description": "从用户对话中提取并更新用户的个人信息",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "name": {
                            "type": "string",
                            "description": "用户的姓名，如果能够准确识别"
                        },
                        "preferred_nickname": {
                            "type": "string",
                            "description": "用户希望客服对用户的称呼，如果用户明确提到"
                        },
                        "gender": {
                            "type": "string",
                            "description": "用户的性别，男或女，如果不能准确识别则为未知"
                        },
                        "age": {
                            "type": "integer",
                            "description": "用户的年龄，如果能够准确识别"
                        },
                        "region": {
                            "type": "string",
                            "description": "用户常驻的地区，不是用户临时所在地"
                        },
                        "interests": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "用户提到的自己的兴趣爱好"
                        },
                        "health_conditions": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "用户提及的健康状况"
                        },
                        "interaction_frequency": {
                            "type": "string",
                            "description": "用户期望的交互频率。每2天联系小于1次为low，每天联系1次为medium，未来均不再联系为stopped"
                        }
                    },
                    "required": []
                }
            }
        }

    def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict], prompt_template = USER_PROFILE_EXTRACT_PROMPT) -> str:
        """
        生成用于信息提取的系统提示词
        """
        context = user_profile.copy()
        context['dialogue_history'] = self.compose_dialogue(dialogue_history)
        context['formatted_user_profile'] = prompt_utils.format_user_profile(user_profile)
        return prompt_template.format(**context)

    @staticmethod
    def compose_dialogue(dialogue: List[Dict]) -> str:
        role_map = {'user': '用户', 'assistant': '客服'}
        messages = []
        for msg in dialogue:
            if not msg['content']:
                continue
            if msg['role'] not in role_map:
                continue
            messages.append('[{}] {}'.format(role_map[msg['role']], msg['content']))
        return '\n'.join(messages)

    def extract_profile_info(self, user_profile, dialogue_history: List[Dict]) -> Optional[Dict]:
        """
        使用Function Calling提取用户画像信息
        """
        if configs.get().get('debug_flags', {}).get('disable_llm_api_call', False):
            return None

        try:
            logger.debug("try to extract profile from message: {}".format(dialogue_history))
            prompt = self.generate_extraction_prompt(user_profile, dialogue_history)
            response = self.llm_client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
                    {"role": "user", "content": prompt}
                ],
                tools=[self.get_extraction_function()],
                temperature=0
            )

            # 解析Function Call的参数
            tool_calls = response.choices[0].message.tool_calls
            logger.debug(response)
            if tool_calls:
                function_call = tool_calls[0]
                if function_call.function.name == 'update_user_profile':
                    try:
                        profile_info = json.loads(function_call.function.arguments)
                        return {k: v for k, v in profile_info.items() if v}
                    except json.JSONDecodeError:
                        logger.error("无法解析提取的用户信息")
                        return None

        except Exception as e:
            logger.error(f"用户画像提取出错: {e}")
            return None

    def extract_profile_info_v2(self, user_profile: Dict, dialogue_history: List[Dict], prompt_template: Optional[str] = None) -> Optional[Dict]:
        """
        使用JSON输出提取用户画像信息
        :param user_profile:
        :param dialogue_history:
        :param prompt_template: 可选的自定义提示模板
        :return:
        """
        if configs.get().get('debug_flags', {}).get('disable_llm_api_call', False):
            return None

        try:
            logger.debug("try to extract profile from message: {}".format(dialogue_history))
            prompt_template = prompt_template or USER_PROFILE_EXTRACT_PROMPT_V2
            prompt = self.generate_extraction_prompt(user_profile, dialogue_history, prompt_template)
            print(prompt)
            response = self.llm_client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
                    {"role": "user", "content": prompt}
                ],
                temperature=0
            )
            json_data = response.choices[0].message.content \
                .replace("```", "").replace("```json", "").strip()
            try:
                profile_info = json.loads(json_data)
            except json.JSONDecodeError as e:
                logger.error(f"Error in JSON decode: {e}, original input: {json_data}")
                return None
            return profile_info

        except Exception as e:
            logger.error(f"用户画像提取出错: {e}")
            return None

    def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict:
        """
        合并新提取的用户信息到现有资料
        """
        merged_profile = existing_profile.copy()
        for field in new_info:
            if field in self.FIELDS:
                merged_profile[field] = new_info[field]
            else:
                logger.warning(f"Unknown field in new profile: {field}")
        return merged_profile

if __name__ == '__main__':
    from pqai_agent import configs
    from pqai_agent import logging_service
    logging_service.setup_root_logger()
    config = configs.get()
    config['debug_flags']['disable_llm_api_call'] = False
    extractor = UserProfileExtractor()
    current_profile = {
        'name': '',
        'preferred_nickname': '李叔',
        "gender": "男",
        'age': 0,
        'region': '北京',
        'health_conditions': [],
        'medications': [],
        'interests': [],
        'interaction_frequency': 'medium'
    }
    messages= [
        {'role': 'user', 'content': "没有任何问题放心，以后不要再发了，再见"}
    ]

    # resp = extractor.extract_profile_info_v2(current_profile, messages)
    # logger.warning(resp)
    message = "好的，孩子，我是老李头，今年68啦，住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋，偶尔也跟年轻人学学新鲜事儿。\n" \
              "你叫我李叔就行，有啥事儿咱们慢慢聊啊\n" \
              "哎，今儿个天气不错啊，我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极，我也跟着比划了两下，这老胳膊老腿的，原来老不舒服，活动活动舒坦多了!\n" \
              "你吃饭了没？我们这儿中午吃的打卤面，老伴儿做的，香得很！这人老了就爱念叨些家长里短的，你可别嫌我啰嗦啊。\n" \
              "对了，最近我孙子教我发语音，比打字方便多啦！就是有时候一激动，说话声音太大，把手机都给震得嗡嗡响\n"
    messages = []
    for line in message.split("\n"):
        messages.append({'role': 'user', 'content': line})
    resp = extractor.extract_profile_info_v2(current_profile, messages)
    logger.warning(resp)
    merged_profile = extractor.merge_profile_info(current_profile, resp)
    logger.warning(merged_profile)
    current_profile = {
        'name': '李老头',
        'preferred_nickname': '李叔',
        "gender": "男",
        'age': 68,
        'region': '北京市海淀区',
        'health_conditions': [],
        'medications': [],
        'interests': ['养生', '下象棋'],
        'interaction_frequency': 'medium'
    }
    resp = extractor.extract_profile_info_v2(merged_profile, messages)
    logger.warning(resp)
    logger.warning(extractor.merge_profile_info(current_profile, resp))