Server
/
AgentCoreService


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
							#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8

import json
from typing import Dict, Any, Optional
from datetime import datetime
from openai import OpenAI
import logging

import global_flags


class UserProfileExtractor:
    def __init__(self):
        self.llm_client = OpenAI(
            api_key='5e275c38-44fd-415f-abcf-4b59f6377f72',
            base_url="https://ark.cn-beijing.volces.com/api/v3"
        )
        self.model_name = 'ep-20250307150409-4blz9'

    def get_extraction_function(self) -> Dict:
        """
        定义用于用户画像信息提取的Function Calling函数
        """
        return {
            "type": "function",
            "function": {
                "name": "update_user_profile",
                "description": "从用户对话中提取并更新用户的个人信息",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "name": {
                            "type": "string",
                            "description": "用户的姓名，如果能够准确识别"
                        },
                        "preferred_nickname": {
                            "type": "string",
                            "description": "用户希望对其的称呼，如果能够准确识别"
                        },
                        "age": {
                            "type": "integer",
                            "description": "用户的年龄，如果能够准确识别"
                        },
                        "region": {
                            "type": "string",
                            "description": "用户所在地区"
                        },
                        "interests": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "用户提到的自己的兴趣爱好"
                        },
                        "health_conditions": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "用户提及的健康状况"
                        }
                    },
                    "required": []
                }
            }
        }

    def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: str) -> str:
        """
        生成用于信息提取的系统提示词
        """
        context = user_profile.copy()
        context['dialogue_history'] = dialogue_history
        return """
请在已有的用户画像的基础上，仔细分析以下对话内容，完善用户的画像信息。
已知信息（可能为空）：
- 姓名：{name}
- 希望的称呼：{preferred_nickname}
- 年龄：{age}
- 地区：{region}
- 健康状况：{health_conditions}
- 兴趣爱好：{interests}

对话历史：
{dialogue_history}

提取要求：
1. 尽可能准确地识别用户的年龄、兴趣爱好、健康状况
2. 关注用户生活、家庭等隐性信息
3. 信息提取需要有较高的置信度，兴趣爱好只保留用户明确喜欢且最关键的5项
4. 如果无法确定具体信息，请不要猜测

请使用update_user_profile函数返回需要更新的信息，注意不要返回无需更新的信息。
""".format(**context)

    def extract_profile_info(self, user_profile, dialogue_history: str) -> Optional[Dict]:
        """
        使用Function Calling提取用户画像信息
        """
        if global_flags.DISABLE_LLM_API_CALL:
            return None

        try:
            logging.debug("try to extract profile from message: {}".format(dialogue_history))
            response = self.llm_client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
                    {"role": "user", "content": self.generate_extraction_prompt(user_profile, dialogue_history)}
                ],
                tools=[self.get_extraction_function()],
                temperature=0
            )

            # 解析Function Call的参数
            tool_calls = response.choices[0].message.tool_calls
            logging.debug(response)
            if tool_calls:
                function_call = tool_calls[0]
                if function_call.function.name == 'update_user_profile':
                    try:
                        profile_info = json.loads(function_call.function.arguments)
                        return {k: v for k, v in profile_info.items() if v}
                    except json.JSONDecodeError:
                        logging.error("无法解析提取的用户信息")
                        return None

        except Exception as e:
            logging.error(f"用户画像提取出错: {e}")
            return None

    def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict:
        """
        合并新提取的用户信息到现有资料
        """
        merged_profile = existing_profile.copy()
        merged_profile.update(new_info)
        return merged_profile

if __name__ == '__main__':
    extractor = UserProfileExtractor()
    current_profile = {
        'name': '',
        'preferred_nickname': '',
        'age': 0,
        'region': '',
        'health_conditions': [],
        'medications': [],
        'interests': []
    }
    message = "好的，孩子，我是老李头，今年68啦，住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋，偶尔也跟年轻人学学新鲜事儿。\n" \
              "你叫我李叔就行，有啥事儿咱们慢慢聊啊\n" \
              "哎，今儿个天气不错啊，我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极，我也跟着比划了两下，这老胳膊老腿的，原来老不舒服，活动活动舒坦多了!\n" \
              "你吃饭了没？我们这儿中午吃的打卤面，老伴儿做的，香得很！这人老了就爱念叨些家长里短的，你可别嫌我啰嗦啊。\n" \
              "对了，最近我孙子教我发语音，比打字方便多啦！就是有时候一激动，说话声音太大，把手机都给震得嗡嗡响\n"
    resp = extractor.extract_profile_info(current_profile, message)
    print(resp)
    print(extractor.merge_profile_info(current_profile, resp))
    current_profile = {
        'name': '李老头',
        'preferred_nickname': '李叔',
        'age': 68,
        'region': '北京市海淀区',
        'health_conditions': [],
        'medications': [],
        'interests': ['养生', '下象棋']
    }
    resp = extractor.extract_profile_info(current_profile, message)
    print(resp)
    print(extractor.merge_profile_info(current_profile, resp))