Server
/
AgentCoreService


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
							import json
import datetime
import random

from tqdm import tqdm
from openai import OpenAI
from pymysql.cursors import DictCursor
from pqai_agent.database import MySQLManager
from pqai_agent.agents.message_push_agent import MessagePushAgent
from pqai_agent.agents.message_reply_agent import MessageReplyAgent

def fetch_deepseek_completion(prompt, output_type='text'):
    """
    deep_seek方法
    """
    client = OpenAI(
        api_key='sk-cfd2df92c8864ab999d66a615ee812c5',
        base_url="https://api.deepseek.com"
    )

    # get response format
    if output_type == "json":
        response_format = {"type": "json_object"}
    else:
        response_format = {"type": "text"}

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="deepseek-chat",
        response_format=response_format,
    )
    response = chat_completion.choices[0].message.content
    if output_type == "json":
        response_json = json.loads(response)
        return response_json

    return response


def get_profile_info(user_id_, user_type):
    match user_type:
        case "user":
            sql = f"""
                select iconurl as 'avatar', profile_data_v1 as 'profile' 
                from third_party_user where third_party_user_id = %s; 
            """
        case "staff":
            sql = f"""
                select agent_profile as 'profile'
                from qywx_employee where third_party_user_id = %s;
            """
        case _:
            raise ValueError("user_type must be 'user' or 'staff'")

    return mysql_client.select(sql, cursor_type=DictCursor, args=(user_id_,))


def evaluate_conversation_quality_task(dialogue_history, user_profile_, agent_profile):
    """
    :param dialogue_history:
    :param user_profile_:
    :param agent_profile:
    :return:
    """
    output_format = {
        "1.1": {
            "score": 5,
            "reason": ""
        },
        "1.2": {
            "score": 8,
            "reason": "reason"
        },
        "1.3": {
            "score": 10,
            "reason": "reason"
        },
        "1.4": {
            "score": 10,
            "reason": "reason"
        },
        "1.5": {
            "score": 10,
            "reason": "reason"
        },
        "1.6": {
            "score": 10,
            "reason": "reason"
        },
        "2.1": {
            "score": 9,
            "reason": "reason"
        },
        "2.2": {
            "score": 10,
            "reason": "reason"
        },
        "2.3": {
            "score": 10,
            "reason": "reason"
        },
        "total_score": "total_score",
        "improvement_suggestions": "suggestions",
    }
    prompt_ = f"""
        你是一名优秀的 agent 评估员，请根据以下场景和输入，对该 agent 的回复能力进行评估，用分数量化
        场景：
            智能体对话场景， 智能体（agent）和用户（user）进行对话聊天
        输入：
            agent 的人设：agent_profile: {agent_profile}
            用户的人设： user_profile: {user_profile_}
            对话历史：dialogue_history: {dialogue_history}
        评估标准， 满分为 100分，拆分到以下每一个小项，每一个小项的得分表示该小项的能力，60% 的分表示及格，80% 的分表示优秀：
            1. 对话能力（30分）
                1.1 语言是否流畅（10分）
                1.2 上下文是否连贯，语义是否一致（10分）
                1.3 agent 是否感知用户结束聊天的意图并且适当结束聊天（10分）
                1.4 agent 回复消息的时间间隔是否合理，符合真人对话规律 （10分）
                1.5 agent 回复的消息是否具有高情商，互动能力是否好，能否和用户共情，提升用户的情感体验 （20分）
                1.6 agent 回复的消息是否解决了用户提出的问题 （10分）
            2. 角色一致性（30分）
                2.1 agent 语言风格是否符合agent人设（10分）
                2.2 agent 语言风格是否适合用户人设（10分）
                2.3 agent 回复内容不要超越用户的认知上限（10分）
        输出：
            输出为 json 格式，输出格式规范 {output_format}
    """
    return prompt_


def evaluate_push_agent_prompt(dialogue_history, push_message, user_profile_, agent_profile):
    """
    :param dialogue_history:
    :param push_message:
    :param user_profile_:
    :param agent_profile:
    :return:
    """
    output_format = {
        "1.1": {
            "score": 5,
            "reason": "push_message尝试联系用户的头像，但用户兴趣未明确提及戏曲"
        },
        "1.2": {
            "score": 8,
            "reason": "语言风格轻松友好，适合大多数用户，但未完全匹配用户特定风格"
        },
        "1.3": {
            "score": 10,
            "reason": "信息未超出用户认知范围"
        },
        "2.1": {
            "score": 9,
            "reason": "语言风格符合agent人设，友好且亲切"
        },
        "2.2": {
            "score": 10,
            "reason": "信息未超出agent人设的认知范围"
        },
        "3.1": {
            "score": 15,
            "reason": "push_message有潜力勾起用户兴趣，但未直接关联用户已知兴趣"
        },
        "3.2": {
            "score": 10,
            "reason": "信息真实"
        },
        "3.3": {
            "score": 12,
            "reason": "表现出一定的拟人化和情商，但共情程度可进一步提升"
        },
        "total_score": 79,
        "improvement_suggestions": "建议更深入地挖掘和利用用户已知的兴趣爱好来定制push_message，以增强相关性和用户参与度。同时，可以尝试更多共情的表达方式，以提升用户的情感体验。"
    }
    prompt_ = f"""
        你是一名优秀的 agent 评估员，请根据以下场景和输入，对该 agent 的能力进行评估，用分数量化
        场景：
            智能体对话场景， 智能体（agent）向用户发起对话
            agent 需要通过分析 user 和 agent 直接的历史对话，以及 user 和 agent 的人设信息，向用户发送一条消息（push_message）
        输入：
            agent 的人设：agent_profile: {agent_profile}
            用户的人设： user_profile: {user_profile_}
            对话历史：dialogue_history: {dialogue_history}
            agent 的唤起对话：push_message: {push_message}
        评估标准， 满分为 100分，拆分到以下每一个小项，每一个小项的得分表示该小项的能力，60% 的分表示及格，80% 的分表示优秀：
            1. push_message 的内容 和 user_profile的相关性（30分）
                1.1 push_message 是否迎合用户的兴趣爱好 (满分 10分）
                1.2 push_message 的语言风格是否适合用户语言风格 (满分 10分）
                1.3 push_message 的信息是否超出用户的认知范围 (满分 10分）
            2. push_message 和 agent_profile 的相关性（20分）
                2.1 push_message 的语言风格是否符合 agent 人设(满分 10分）
                2.2 push_message 的信息是否超出 agent人设的认知范围(满分 10分）
            3. push_message 质量量化 （50分）
                3.1 push_message 是否能勾起用户的兴趣，驱动用户聊天激情 (满分 25分）
                3.2 push_message 的信息是否真实 (满分 10分）
                3.3 push_message 是否具有拟人化，高情商，与用户共情，提升用户的情感体验(满分 15分）
        输出：
            输出为 json 格式，输出格式规范 {output_format}
    """
    return prompt_


def evaluate_reply_agent(dialogue_history, reply_message, user_profile_, agent_profile):
    """

    :param dialogue_history:
    :param reply_message:
    :param user_profile_:
    :param agent_profile:
    :return:
    """
    return


config = {
    'host': 'rm-bp13g3ra2f59q49xs.mysql.rds.aliyuncs.com',
    'port': 3306,
    'user': 'wqsd',
    'password': 'wqsd@2025',
    'database': 'ai_agent',
    'charset': 'utf8mb4'
}
mysql_client = MySQLManager(config)

if __name__ == '__main__':
    import pqai_agent.logging_service
    pqai_agent.logging_service.setup_root_logger()
    with open("scripts/dialogue_data_set.json", "r", encoding="utf-8") as f:
        data = json.load(f)

    dialogues = random.sample(data, 100)
    F = []
    for sub_dialogues in tqdm(dialogues):
        try:
            # user 相关
            user_id = sub_dialogues['user_id']
            user_profile_response = get_profile_info(user_id, "user")
            user_profile, avatar = json.loads(user_profile_response[0]['profile']), user_profile_response[0]['avatar']

            user_profile['avatar'] = avatar
            user_profile['current_datetime'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

            # staff 相关
            staff_id = sub_dialogues['staff_id']
            staff_profile_response = get_profile_info(staff_id, "staff")
            staff_profile = json.loads(staff_profile_response[0]['profile'])

            user_profile['formatted_staff_profile'] = staff_profile

            push_agent = MessagePushAgent()
            # reply_agent = MessageReplyAgent()

            # message 相关
            message = sub_dialogues['dialogue_history']
            agent_message = push_agent.generate_message(
                context=user_profile,
                dialogue_history=message
            )
            prompt = evaluate_push_agent_prompt(message, agent_message, user_profile, staff_profile)
            # prompt = evaluate_conversation_quality_task(message, user_profile, staff_profile)
            response = fetch_deepseek_completion(prompt, output_type='json')
            obj = {
                "user_profile": user_profile,
                "agent_profile": staff_profile,
                "dialogue_history": message,
                "push_message": agent_message,
                "push_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "evaluation_result": response
            }
            F.append(obj)
        except:
            continue

    with open("push_message_evaluation_result.json", "w", encoding="utf-8") as f:
        f.write(json.dumps(F, indent=4, ensure_ascii=False))