|  | @@ -0,0 +1,271 @@
 | 
											
												
													
														|  | 
 |  | +import json
 | 
											
												
													
														|  | 
 |  | +import datetime
 | 
											
												
													
														|  | 
 |  | +import random
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +from tqdm import tqdm
 | 
											
												
													
														|  | 
 |  | +from openai import OpenAI
 | 
											
												
													
														|  | 
 |  | +from pymysql.cursors import DictCursor
 | 
											
												
													
														|  | 
 |  | +from pqai_agent.database import MySQLManager
 | 
											
												
													
														|  | 
 |  | +from pqai_agent.agents.message_push_agent import MessagePushAgent
 | 
											
												
													
														|  | 
 |  | +from pqai_agent.agents.message_reply_agent import MessageReplyAgent
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +def fetch_deepseek_completion(prompt, output_type='text'):
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    deep_seek方法
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    client = OpenAI(
 | 
											
												
													
														|  | 
 |  | +        api_key='sk-cfd2df92c8864ab999d66a615ee812c5',
 | 
											
												
													
														|  | 
 |  | +        base_url="https://api.deepseek.com"
 | 
											
												
													
														|  | 
 |  | +    )
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    # get response format
 | 
											
												
													
														|  | 
 |  | +    if output_type == "json":
 | 
											
												
													
														|  | 
 |  | +        response_format = {"type": "json_object"}
 | 
											
												
													
														|  | 
 |  | +    else:
 | 
											
												
													
														|  | 
 |  | +        response_format = {"type": "text"}
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    chat_completion = client.chat.completions.create(
 | 
											
												
													
														|  | 
 |  | +        messages=[
 | 
											
												
													
														|  | 
 |  | +            {
 | 
											
												
													
														|  | 
 |  | +                "role": "user",
 | 
											
												
													
														|  | 
 |  | +                "content": prompt,
 | 
											
												
													
														|  | 
 |  | +            }
 | 
											
												
													
														|  | 
 |  | +        ],
 | 
											
												
													
														|  | 
 |  | +        model="deepseek-chat",
 | 
											
												
													
														|  | 
 |  | +        response_format=response_format,
 | 
											
												
													
														|  | 
 |  | +    )
 | 
											
												
													
														|  | 
 |  | +    response = chat_completion.choices[0].message.content
 | 
											
												
													
														|  | 
 |  | +    if output_type == "json":
 | 
											
												
													
														|  | 
 |  | +        response_json = json.loads(response)
 | 
											
												
													
														|  | 
 |  | +        return response_json
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    return response
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +def get_profile_info(user_id_, user_type):
 | 
											
												
													
														|  | 
 |  | +    match user_type:
 | 
											
												
													
														|  | 
 |  | +        case "user":
 | 
											
												
													
														|  | 
 |  | +            sql = f"""
 | 
											
												
													
														|  | 
 |  | +                select iconurl as 'avatar', profile_data_v1 as 'profile' 
 | 
											
												
													
														|  | 
 |  | +                from third_party_user where third_party_user_id = %s; 
 | 
											
												
													
														|  | 
 |  | +            """
 | 
											
												
													
														|  | 
 |  | +        case "staff":
 | 
											
												
													
														|  | 
 |  | +            sql = f"""
 | 
											
												
													
														|  | 
 |  | +                select agent_profile as 'profile'
 | 
											
												
													
														|  | 
 |  | +                from qywx_employee where third_party_user_id = %s;
 | 
											
												
													
														|  | 
 |  | +            """
 | 
											
												
													
														|  | 
 |  | +        case _:
 | 
											
												
													
														|  | 
 |  | +            raise ValueError("user_type must be 'user' or 'staff'")
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    return mysql_client.select(sql, cursor_type=DictCursor, args=(user_id_,))
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +def evaluate_conversation_quality_task(dialogue_history, user_profile_, agent_profile):
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    :param dialogue_history:
 | 
											
												
													
														|  | 
 |  | +    :param user_profile_:
 | 
											
												
													
														|  | 
 |  | +    :param agent_profile:
 | 
											
												
													
														|  | 
 |  | +    :return:
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    output_format = {
 | 
											
												
													
														|  | 
 |  | +        "1.1": {
 | 
											
												
													
														|  | 
 |  | +            "score": 5,
 | 
											
												
													
														|  | 
 |  | +            "reason": ""
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "1.2": {
 | 
											
												
													
														|  | 
 |  | +            "score": 8,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "1.3": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "1.4": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "1.5": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "1.6": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "2.1": {
 | 
											
												
													
														|  | 
 |  | +            "score": 9,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "2.2": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "2.3": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "reason"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "total_score": "total_score",
 | 
											
												
													
														|  | 
 |  | +        "improvement_suggestions": "suggestions",
 | 
											
												
													
														|  | 
 |  | +    }
 | 
											
												
													
														|  | 
 |  | +    prompt_ = f"""
 | 
											
												
													
														|  | 
 |  | +        你是一名优秀的 agent 评估员,请根据以下场景和输入,对该 agent 的回复能力进行评估,用分数量化
 | 
											
												
													
														|  | 
 |  | +        场景:
 | 
											
												
													
														|  | 
 |  | +            智能体对话场景, 智能体(agent)和用户(user)进行对话聊天
 | 
											
												
													
														|  | 
 |  | +        输入:
 | 
											
												
													
														|  | 
 |  | +            agent 的人设:agent_profile: {agent_profile}
 | 
											
												
													
														|  | 
 |  | +            用户的人设: user_profile: {user_profile_}
 | 
											
												
													
														|  | 
 |  | +            对话历史:dialogue_history: {dialogue_history}
 | 
											
												
													
														|  | 
 |  | +        评估标准, 满分为 100分,拆分到以下每一个小项,每一个小项的得分表示该小项的能力,60% 的分表示及格,80% 的分表示优秀:
 | 
											
												
													
														|  | 
 |  | +            1. 对话能力(30分)
 | 
											
												
													
														|  | 
 |  | +                1.1 语言是否流畅(10分)
 | 
											
												
													
														|  | 
 |  | +                1.2 上下文是否连贯,语义是否一致(10分)
 | 
											
												
													
														|  | 
 |  | +                1.3 agent 是否感知用户结束聊天的意图并且适当结束聊天(10分)
 | 
											
												
													
														|  | 
 |  | +                1.4 agent 回复消息的时间间隔是否合理,符合真人对话规律 (10分)
 | 
											
												
													
														|  | 
 |  | +                1.5 agent 回复的消息是否具有高情商,互动能力是否好,能否和用户共情,提升用户的情感体验 (20分)
 | 
											
												
													
														|  | 
 |  | +                1.6 agent 回复的消息是否解决了用户提出的问题 (10分)
 | 
											
												
													
														|  | 
 |  | +            2. 角色一致性(30分)
 | 
											
												
													
														|  | 
 |  | +                2.1 agent 语言风格是否符合agent人设(10分)
 | 
											
												
													
														|  | 
 |  | +                2.2 agent 语言风格是否适合用户人设(10分)
 | 
											
												
													
														|  | 
 |  | +                2.3 agent 回复内容不要超越用户的认知上限(10分)
 | 
											
												
													
														|  | 
 |  | +        输出:
 | 
											
												
													
														|  | 
 |  | +            输出为 json 格式,输出格式规范 {output_format}
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    return prompt_
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +def evaluate_push_agent_prompt(dialogue_history, push_message, user_profile_, agent_profile):
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    :param dialogue_history:
 | 
											
												
													
														|  | 
 |  | +    :param push_message:
 | 
											
												
													
														|  | 
 |  | +    :param user_profile_:
 | 
											
												
													
														|  | 
 |  | +    :param agent_profile:
 | 
											
												
													
														|  | 
 |  | +    :return:
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    output_format = {
 | 
											
												
													
														|  | 
 |  | +        "1.1": {
 | 
											
												
													
														|  | 
 |  | +            "score": 5,
 | 
											
												
													
														|  | 
 |  | +            "reason": "push_message尝试联系用户的头像,但用户兴趣未明确提及戏曲"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "1.2": {
 | 
											
												
													
														|  | 
 |  | +            "score": 8,
 | 
											
												
													
														|  | 
 |  | +            "reason": "语言风格轻松友好,适合大多数用户,但未完全匹配用户特定风格"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "1.3": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "信息未超出用户认知范围"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "2.1": {
 | 
											
												
													
														|  | 
 |  | +            "score": 9,
 | 
											
												
													
														|  | 
 |  | +            "reason": "语言风格符合agent人设,友好且亲切"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "2.2": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "信息未超出agent人设的认知范围"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "3.1": {
 | 
											
												
													
														|  | 
 |  | +            "score": 15,
 | 
											
												
													
														|  | 
 |  | +            "reason": "push_message有潜力勾起用户兴趣,但未直接关联用户已知兴趣"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "3.2": {
 | 
											
												
													
														|  | 
 |  | +            "score": 10,
 | 
											
												
													
														|  | 
 |  | +            "reason": "信息真实"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "3.3": {
 | 
											
												
													
														|  | 
 |  | +            "score": 12,
 | 
											
												
													
														|  | 
 |  | +            "reason": "表现出一定的拟人化和情商,但共情程度可进一步提升"
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        "total_score": 79,
 | 
											
												
													
														|  | 
 |  | +        "improvement_suggestions": "建议更深入地挖掘和利用用户已知的兴趣爱好来定制push_message,以增强相关性和用户参与度。同时,可以尝试更多共情的表达方式,以提升用户的情感体验。"
 | 
											
												
													
														|  | 
 |  | +    }
 | 
											
												
													
														|  | 
 |  | +    prompt_ = f"""
 | 
											
												
													
														|  | 
 |  | +        你是一名优秀的 agent 评估员,请根据以下场景和输入,对该 agent 的能力进行评估,用分数量化
 | 
											
												
													
														|  | 
 |  | +        场景:
 | 
											
												
													
														|  | 
 |  | +            智能体对话场景, 智能体(agent)向用户发起对话
 | 
											
												
													
														|  | 
 |  | +            agent 需要通过分析 user 和 agent 直接的历史对话,以及 user 和 agent 的人设信息,向用户发送一条消息(push_message)
 | 
											
												
													
														|  | 
 |  | +        输入:
 | 
											
												
													
														|  | 
 |  | +            agent 的人设:agent_profile: {agent_profile}
 | 
											
												
													
														|  | 
 |  | +            用户的人设: user_profile: {user_profile_}
 | 
											
												
													
														|  | 
 |  | +            对话历史:dialogue_history: {dialogue_history}
 | 
											
												
													
														|  | 
 |  | +            agent 的唤起对话:push_message: {push_message}
 | 
											
												
													
														|  | 
 |  | +        评估标准, 满分为 100分,拆分到以下每一个小项,每一个小项的得分表示该小项的能力,60% 的分表示及格,80% 的分表示优秀:
 | 
											
												
													
														|  | 
 |  | +            1. push_message 的内容 和 user_profile的相关性(30分)
 | 
											
												
													
														|  | 
 |  | +                1.1 push_message 是否迎合用户的兴趣爱好 (满分 10分)
 | 
											
												
													
														|  | 
 |  | +                1.2 push_message 的语言风格是否适合用户语言风格 (满分 10分)
 | 
											
												
													
														|  | 
 |  | +                1.3 push_message 的信息是否超出用户的认知范围 (满分 10分)
 | 
											
												
													
														|  | 
 |  | +            2. push_message 和 agent_profile 的相关性(20分)
 | 
											
												
													
														|  | 
 |  | +                2.1 push_message 的语言风格是否符合 agent 人设(满分 10分)
 | 
											
												
													
														|  | 
 |  | +                2.2 push_message 的信息是否超出 agent人设的认知范围(满分 10分)
 | 
											
												
													
														|  | 
 |  | +            3. push_message 质量量化 (50分)
 | 
											
												
													
														|  | 
 |  | +                3.1 push_message 是否能勾起用户的兴趣,驱动用户聊天激情 (满分 25分)
 | 
											
												
													
														|  | 
 |  | +                3.2 push_message 的信息是否真实 (满分 10分)
 | 
											
												
													
														|  | 
 |  | +                3.3 push_message 是否具有拟人化,高情商,与用户共情,提升用户的情感体验(满分 15分)
 | 
											
												
													
														|  | 
 |  | +        输出:
 | 
											
												
													
														|  | 
 |  | +            输出为 json 格式,输出格式规范 {output_format}
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    return prompt_
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +def evaluate_reply_agent(dialogue_history, reply_message, user_profile_, agent_profile):
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    :param dialogue_history:
 | 
											
												
													
														|  | 
 |  | +    :param reply_message:
 | 
											
												
													
														|  | 
 |  | +    :param user_profile_:
 | 
											
												
													
														|  | 
 |  | +    :param agent_profile:
 | 
											
												
													
														|  | 
 |  | +    :return:
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    return
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +config = {
 | 
											
												
													
														|  | 
 |  | +    'host': 'rm-bp13g3ra2f59q49xs.mysql.rds.aliyuncs.com',
 | 
											
												
													
														|  | 
 |  | +    'port': 3306,
 | 
											
												
													
														|  | 
 |  | +    'user': 'wqsd',
 | 
											
												
													
														|  | 
 |  | +    'password': 'wqsd@2025',
 | 
											
												
													
														|  | 
 |  | +    'database': 'ai_agent',
 | 
											
												
													
														|  | 
 |  | +    'charset': 'utf8mb4'
 | 
											
												
													
														|  | 
 |  | +}
 | 
											
												
													
														|  | 
 |  | +mysql_client = MySQLManager(config)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +if __name__ == '__main__':
 | 
											
												
													
														|  | 
 |  | +    with open("scripts/inner_dialogues.json", "r", encoding="utf-8") as f:
 | 
											
												
													
														|  | 
 |  | +        data = json.load(f)
 | 
											
												
													
														|  | 
 |  | +    dialogues = random.sample(data[10: ], 5)
 | 
											
												
													
														|  | 
 |  | +    F = []
 | 
											
												
													
														|  | 
 |  | +    for sub_dialogues in tqdm(dialogues):
 | 
											
												
													
														|  | 
 |  | +        user_id = sub_dialogues['user_id']
 | 
											
												
													
														|  | 
 |  | +        user_profile_response = get_profile_info(user_id, "user")
 | 
											
												
													
														|  | 
 |  | +        user_profile, avatar = json.loads(user_profile_response[0]['profile']), user_profile_response[0]['avatar']
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        user_profile['avatar'] = avatar
 | 
											
												
													
														|  | 
 |  | +        user_profile['current_datetime'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        staff_id = sub_dialogues['staff_id']
 | 
											
												
													
														|  | 
 |  | +        staff_profile_response = get_profile_info(staff_id, "staff")
 | 
											
												
													
														|  | 
 |  | +        staff_profile = json.loads(staff_profile_response[0]['profile'])
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        user_profile['formatted_staff_profile'] = staff_profile
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        push_agent = MessagePushAgent()
 | 
											
												
													
														|  | 
 |  | +        # reply_agent = MessageReplyAgent()
 | 
											
												
													
														|  | 
 |  | +        for message in sub_dialogues['dialogues']:
 | 
											
												
													
														|  | 
 |  | +            agent_message = push_agent.generate_message(
 | 
											
												
													
														|  | 
 |  | +                context=user_profile,
 | 
											
												
													
														|  | 
 |  | +                dialogue_history=message
 | 
											
												
													
														|  | 
 |  | +            )
 | 
											
												
													
														|  | 
 |  | +            prompt = evaluate_push_agent_prompt(message, agent_message, user_profile, staff_profile)
 | 
											
												
													
														|  | 
 |  | +            response = fetch_deepseek_completion(prompt, output_type='json')
 | 
											
												
													
														|  | 
 |  | +            obj = {
 | 
											
												
													
														|  | 
 |  | +                "user_profile": user_profile,
 | 
											
												
													
														|  | 
 |  | +                "agent_profile": staff_profile,
 | 
											
												
													
														|  | 
 |  | +                "dialogue_history": message,
 | 
											
												
													
														|  | 
 |  | +                "push_message": agent_message,
 | 
											
												
													
														|  | 
 |  | +                "push_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
 | 
											
												
													
														|  | 
 |  | +                "evaluation_result": response
 | 
											
												
													
														|  | 
 |  | +            }
 | 
											
												
													
														|  | 
 |  | +            F.append(obj)
 | 
											
												
													
														|  | 
 |  | +    with open("scripts/push_message_evaluation_result.json", "w", encoding="utf-8") as f:
 | 
											
												
													
														|  | 
 |  | +        f.write(json.dumps(F, indent=4, ensure_ascii=False))
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 |