#! /usr/bin/env python # -*- coding: utf-8 -*- # vim:fenc=utf-8 import json from typing import Dict, Any, Optional from prompt_templates import USER_PROFILE_EXTRACT_PROMPT from openai import OpenAI import logging import global_flags class UserProfileExtractor: def __init__(self): self.llm_client = OpenAI( api_key='5e275c38-44fd-415f-abcf-4b59f6377f72', base_url="https://ark.cn-beijing.volces.com/api/v3" ) self.model_name = 'ep-20250307150409-4blz9' def get_extraction_function(self) -> Dict: """ 定义用于用户画像信息提取的Function Calling函数 """ return { "type": "function", "function": { "name": "update_user_profile", "description": "从用户对话中提取并更新用户的个人信息", "parameters": { "type": "object", "properties": { "name": { "type": "string", "description": "用户的姓名,如果能够准确识别" }, "preferred_nickname": { "type": "string", "description": "用户希望对其的称呼,如果能够准确识别" }, "age": { "type": "integer", "description": "用户的年龄,如果能够准确识别" }, "region": { "type": "string", "description": "用户常驻的地区,不是用户临时所在地" }, "interests": { "type": "array", "items": {"type": "string"}, "description": "用户提到的自己的兴趣爱好" }, "health_conditions": { "type": "array", "items": {"type": "string"}, "description": "用户提及的健康状况" } }, "required": [] } } } def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: str) -> str: """ 生成用于信息提取的系统提示词 """ context = user_profile.copy() context['dialogue_history'] = dialogue_history return USER_PROFILE_EXTRACT_PROMPT.format(**context) def extract_profile_info(self, user_profile, dialogue_history: str) -> Optional[Dict]: """ 使用Function Calling提取用户画像信息 """ if global_flags.DISABLE_LLM_API_CALL: return None try: logging.debug("try to extract profile from message: {}".format(dialogue_history)) response = self.llm_client.chat.completions.create( model=self.model_name, messages=[ {"role": "system", "content": '你是一个专业的用户画像分析助手。'}, {"role": "user", "content": self.generate_extraction_prompt(user_profile, dialogue_history)} ], tools=[self.get_extraction_function()], temperature=0 ) # 解析Function Call的参数 tool_calls = response.choices[0].message.tool_calls logging.debug(response) if tool_calls: function_call = tool_calls[0] if function_call.function.name == 'update_user_profile': try: profile_info = json.loads(function_call.function.arguments) return {k: v for k, v in profile_info.items() if v} except json.JSONDecodeError: logging.error("无法解析提取的用户信息") return None except Exception as e: logging.error(f"用户画像提取出错: {e}") return None def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict: """ 合并新提取的用户信息到现有资料 """ merged_profile = existing_profile.copy() merged_profile.update(new_info) return merged_profile if __name__ == '__main__': extractor = UserProfileExtractor() current_profile = { 'name': '', 'preferred_nickname': '李叔', 'age': 0, 'region': '北京', 'health_conditions': [], 'medications': [], 'interests': [] } message = "我回天津老家了" resp = extractor.extract_profile_info(current_profile, message) print(resp) message = "好的,孩子,我是老李头,今年68啦,住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋,偶尔也跟年轻人学学新鲜事儿。\n" \ "你叫我李叔就行,有啥事儿咱们慢慢聊啊\n" \ "哎,今儿个天气不错啊,我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极,我也跟着比划了两下,这老胳膊老腿的,原来老不舒服,活动活动舒坦多了!\n" \ "你吃饭了没?我们这儿中午吃的打卤面,老伴儿做的,香得很!这人老了就爱念叨些家长里短的,你可别嫌我啰嗦啊。\n" \ "对了,最近我孙子教我发语音,比打字方便多啦!就是有时候一激动,说话声音太大,把手机都给震得嗡嗡响\n" resp = extractor.extract_profile_info(current_profile, message) print(resp) print(extractor.merge_profile_info(current_profile, resp)) current_profile = { 'name': '李老头', 'preferred_nickname': '李叔', 'age': 68, 'region': '北京市海淀区', 'health_conditions': [], 'medications': [], 'interests': ['养生', '下象棋'] } resp = extractor.extract_profile_info(current_profile, message) print(resp) print(extractor.merge_profile_info(current_profile, resp))