Server
/
AgentCoreService


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
							#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8

import json
from typing import Dict, Any, Optional, List

import chat_service
import configs
from prompt_templates import USER_PROFILE_EXTRACT_PROMPT
from openai import OpenAI
from logging_service import logger


class UserProfileExtractor:
    def __init__(self):
        self.llm_client = OpenAI(
            api_key=chat_service.VOLCENGINE_API_TOKEN,
            base_url=chat_service.VOLCENGINE_BASE_URL
        )
        self.model_name = chat_service.VOLCENGINE_MODEL_DEEPSEEK_V3

    @staticmethod
    def get_extraction_function() -> Dict:
        """
        定义用于用户画像信息提取的Function Calling函数
        """
        return {
            "type": "function",
            "function": {
                "name": "update_user_profile",
                "description": "从用户对话中提取并更新用户的个人信息",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "name": {
                            "type": "string",
                            "description": "用户的姓名，如果能够准确识别"
                        },
                        "preferred_nickname": {
                            "type": "string",
                            "description": "用户希望客服对用户的称呼，如果用户明确提到"
                        },
                        "gender": {
                            "type": "string",
                            "description": "用户的性别，男或女，如果不能准确识别则为未知"
                        },
                        "age": {
                            "type": "integer",
                            "description": "用户的年龄，如果能够准确识别"
                        },
                        "region": {
                            "type": "string",
                            "description": "用户常驻的地区，不是用户临时所在地"
                        },
                        "interests": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "用户提到的自己的兴趣爱好"
                        },
                        "health_conditions": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "用户提及的健康状况"
                        },
                        "interaction_frequency": {
                            "type": "string",
                            "description": "用户期望的交互频率。每2天联系小于1次为low，每天联系1次为medium，不再联系为stopped"
                        }
                    },
                    "required": []
                }
            }
        }

    def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict]) -> str:
        """
        生成用于信息提取的系统提示词
        """
        context = user_profile.copy()
        context['dialogue_history'] = self.compose_dialogue(dialogue_history)
        return USER_PROFILE_EXTRACT_PROMPT.format(**context)

    @staticmethod
    def compose_dialogue(dialogue: List[Dict]) -> str:
        role_map = {'user': '用户', 'assistant': '客服'}
        messages = []
        for msg in dialogue:
            if not msg['content']:
                continue
            if msg['role'] not in role_map:
                continue
            messages.append('[{}] {}'.format(role_map[msg['role']], msg['content']))
        return '\n'.join(messages)

    def extract_profile_info(self, user_profile, dialogue_history: List[Dict]) -> Optional[Dict]:
        """
        使用Function Calling提取用户画像信息
        """
        if configs.get().get('debug_flags', {}).get('disable_llm_api_call', False):
            return None

        try:
            logger.debug("try to extract profile from message: {}".format(dialogue_history))
            prompt = self.generate_extraction_prompt(user_profile, dialogue_history)
            response = self.llm_client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
                    {"role": "user", "content": prompt}
                ],
                tools=[self.get_extraction_function()],
                temperature=0
            )

            # 解析Function Call的参数
            tool_calls = response.choices[0].message.tool_calls
            logger.debug(response)
            if tool_calls:
                function_call = tool_calls[0]
                if function_call.function.name == 'update_user_profile':
                    try:
                        profile_info = json.loads(function_call.function.arguments)
                        return {k: v for k, v in profile_info.items() if v}
                    except json.JSONDecodeError:
                        logger.error("无法解析提取的用户信息")
                        return None

        except Exception as e:
            logger.error(f"用户画像提取出错: {e}")
            return None

    def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict:
        """
        合并新提取的用户信息到现有资料
        """
        merged_profile = existing_profile.copy()
        merged_profile.update(new_info)
        return merged_profile

if __name__ == '__main__':
    extractor = UserProfileExtractor()
    current_profile = {
        'name': '',
        'preferred_nickname': '李叔',
        "gender": "男",
        'age': 0,
        'region': '北京',
        'health_conditions': [],
        'medications': [],
        'interests': [],
        'interaction_frequency': 'medium'
    }
    messages= [
        {'role': 'user', 'content': "没有任何问题放心，不会骚扰你了，再见"}
    ]

    resp = extractor.extract_profile_info(current_profile, messages)
    print(resp)
    message = "好的，孩子，我是老李头，今年68啦，住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋，偶尔也跟年轻人学学新鲜事儿。\n" \
              "你叫我李叔就行，有啥事儿咱们慢慢聊啊\n" \
              "哎，今儿个天气不错啊，我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极，我也跟着比划了两下，这老胳膊老腿的，原来老不舒服，活动活动舒坦多了!\n" \
              "你吃饭了没？我们这儿中午吃的打卤面，老伴儿做的，香得很！这人老了就爱念叨些家长里短的，你可别嫌我啰嗦啊。\n" \
              "对了，最近我孙子教我发语音，比打字方便多啦！就是有时候一激动，说话声音太大，把手机都给震得嗡嗡响\n"
    messages = []
    for line in message.split("\n"):
        messages.append({'role': 'user', 'content': line})
    resp = extractor.extract_profile_info(current_profile, messages)
    print(resp)
    print(extractor.merge_profile_info(current_profile, resp))
    current_profile = {
        'name': '李老头',
        'preferred_nickname': '李叔',
        "gender": "男",
        'age': 68,
        'region': '北京市海淀区',
        'health_conditions': [],
        'medications': [],
        'interests': ['养生', '下象棋'],
        'interaction_frequency': 'medium'
    }
    resp = extractor.extract_profile_info(current_profile, messages)
    print(resp)
    print(extractor.merge_profile_info(current_profile, resp))