123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- #! /usr/bin/env python
- # -*- coding: utf-8 -*-
- # vim:fenc=utf-8
- import json
- from typing import Dict, Any, Optional, List
- import chat_service
- import configs
- from prompt_templates import USER_PROFILE_EXTRACT_PROMPT
- from openai import OpenAI
- from logging_service import logger
- class UserProfileExtractor:
- def __init__(self):
- self.llm_client = OpenAI(
- api_key=chat_service.VOLCENGINE_API_TOKEN,
- base_url=chat_service.VOLCENGINE_BASE_URL
- )
- self.model_name = chat_service.VOLCENGINE_MODEL_DEEPSEEK_V3
- @staticmethod
- def get_extraction_function() -> Dict:
- """
- 定义用于用户画像信息提取的Function Calling函数
- """
- return {
- "type": "function",
- "function": {
- "name": "update_user_profile",
- "description": "从用户对话中提取并更新用户的个人信息",
- "parameters": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string",
- "description": "用户的姓名,如果能够准确识别"
- },
- "preferred_nickname": {
- "type": "string",
- "description": "用户希望客服对用户的称呼,如果用户明确提到"
- },
- "gender": {
- "type": "string",
- "description": "用户的性别,男或女,如果不能准确识别则为未知"
- },
- "age": {
- "type": "integer",
- "description": "用户的年龄,如果能够准确识别"
- },
- "region": {
- "type": "string",
- "description": "用户常驻的地区,不是用户临时所在地"
- },
- "interests": {
- "type": "array",
- "items": {"type": "string"},
- "description": "用户提到的自己的兴趣爱好"
- },
- "health_conditions": {
- "type": "array",
- "items": {"type": "string"},
- "description": "用户提及的健康状况"
- },
- "interaction_frequency": {
- "type": "string",
- "description": "用户期望的交互频率。每2天联系小于1次为low,每天联系1次为medium,不再联系为stopped"
- }
- },
- "required": []
- }
- }
- }
- def generate_extraction_prompt(self, user_profile: Dict, dialogue_history: List[Dict]) -> str:
- """
- 生成用于信息提取的系统提示词
- """
- context = user_profile.copy()
- context['dialogue_history'] = self.compose_dialogue(dialogue_history)
- return USER_PROFILE_EXTRACT_PROMPT.format(**context)
- @staticmethod
- def compose_dialogue(dialogue: List[Dict]) -> str:
- role_map = {'user': '用户', 'assistant': '客服'}
- messages = []
- for msg in dialogue:
- if not msg['content']:
- continue
- if msg['role'] not in role_map:
- continue
- messages.append('[{}] {}'.format(role_map[msg['role']], msg['content']))
- return '\n'.join(messages)
- def extract_profile_info(self, user_profile, dialogue_history: List[Dict]) -> Optional[Dict]:
- """
- 使用Function Calling提取用户画像信息
- """
- if configs.get().get('debug_flags', {}).get('disable_llm_api_call', False):
- return None
- try:
- logger.debug("try to extract profile from message: {}".format(dialogue_history))
- prompt = self.generate_extraction_prompt(user_profile, dialogue_history)
- response = self.llm_client.chat.completions.create(
- model=self.model_name,
- messages=[
- {"role": "system", "content": '你是一个专业的用户画像分析助手。'},
- {"role": "user", "content": prompt}
- ],
- tools=[self.get_extraction_function()],
- temperature=0
- )
- # 解析Function Call的参数
- tool_calls = response.choices[0].message.tool_calls
- logger.debug(response)
- if tool_calls:
- function_call = tool_calls[0]
- if function_call.function.name == 'update_user_profile':
- try:
- profile_info = json.loads(function_call.function.arguments)
- return {k: v for k, v in profile_info.items() if v}
- except json.JSONDecodeError:
- logger.error("无法解析提取的用户信息")
- return None
- except Exception as e:
- logger.error(f"用户画像提取出错: {e}")
- return None
- def merge_profile_info(self, existing_profile: Dict, new_info: Dict) -> Dict:
- """
- 合并新提取的用户信息到现有资料
- """
- merged_profile = existing_profile.copy()
- merged_profile.update(new_info)
- return merged_profile
- if __name__ == '__main__':
- extractor = UserProfileExtractor()
- current_profile = {
- 'name': '',
- 'preferred_nickname': '李叔',
- "gender": "男",
- 'age': 0,
- 'region': '北京',
- 'health_conditions': [],
- 'medications': [],
- 'interests': [],
- 'interaction_frequency': 'medium'
- }
- messages= [
- {'role': 'user', 'content': "没有任何问题放心,不会骚扰你了,再见"}
- ]
- resp = extractor.extract_profile_info(current_profile, messages)
- print(resp)
- message = "好的,孩子,我是老李头,今年68啦,住在北京海淀区。平时喜欢在微信上跟老伙伴们聊聊养生、下下象棋,偶尔也跟年轻人学学新鲜事儿。\n" \
- "你叫我李叔就行,有啥事儿咱们慢慢聊啊\n" \
- "哎,今儿个天气不错啊,我刚才还去楼下小公园溜达了一圈儿。碰到几个老伙计在打太极,我也跟着比划了两下,这老胳膊老腿的,原来老不舒服,活动活动舒坦多了!\n" \
- "你吃饭了没?我们这儿中午吃的打卤面,老伴儿做的,香得很!这人老了就爱念叨些家长里短的,你可别嫌我啰嗦啊。\n" \
- "对了,最近我孙子教我发语音,比打字方便多啦!就是有时候一激动,说话声音太大,把手机都给震得嗡嗡响\n"
- messages = []
- for line in message.split("\n"):
- messages.append({'role': 'user', 'content': line})
- resp = extractor.extract_profile_info(current_profile, messages)
- print(resp)
- print(extractor.merge_profile_info(current_profile, resp))
- current_profile = {
- 'name': '李老头',
- 'preferred_nickname': '李叔',
- "gender": "男",
- 'age': 68,
- 'region': '北京市海淀区',
- 'health_conditions': [],
- 'medications': [],
- 'interests': ['养生', '下象棋'],
- 'interaction_frequency': 'medium'
- }
- resp = extractor.extract_profile_info(current_profile, messages)
- print(resp)
- print(extractor.merge_profile_info(current_profile, resp))
|