Server
/
AgentCoreService


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
							import json

from openai import OpenAI

def fetch_deepseek_completion(prompt, output_type='text'):
    """
    deep_seek方法
    """
    client = OpenAI(
        api_key='sk-cfd2df92c8864ab999d66a615ee812c5',
        base_url="https://api.deepseek.com"
    )

    # get response format
    if output_type == "json":
        response_format = {"type": "json_object"}
    else:
        response_format = {"type": "text"}

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="deepseek-chat",
        response_format=response_format,
    )
    response = chat_completion.choices[0].message.content
    if output_type == "json":
        response_json = json.loads(response)
        return response_json

    return response


class PushAgentEvaluator:
    def __init__(self, dialogue_history, push_message, push_time, user_profile, agent_profile):
        self.dialogue_history = dialogue_history
        self.push_message = push_message
        self.user_profile = user_profile
        self.agent_profile = agent_profile
        self.push_time = push_time

    def evaluate_user_match(self):
        """用户匹配度评估（24分）"""
        out_put_format = {
                "语言风格": {
                    "score": 8,
                    "reason": "语言风格符合用户特征，年龄、性别、地域等特征都匹配"
                },
                "信息边界": {
                    "score": 10,
                    "reason": "信息范围符合用户认知范围"
                },
                "特征匹配": {
                    "score": 10,
                    "reason": "特征匹配度高，user_profile中有兴趣爱好，健康状态，用药状态，居住地址，家庭成员等特征，在 push_message需要体现出来"
            }
        }
        analysis_prompt = f"""
            通过分析用户的以下特征
                年龄: {self.user_profile['age']}, 
                性别: {self.user_profile['gender']}, 
                兴趣: {self.user_profile['interests']},
                健康状态: {self.user_profile['health_conditions']},
                用药状态: {self.user_profile['medications']},
                居住地址: {self.user_profile['region']},
                家庭成员: {self.user_profile['family_members']},
            和 agent 发送的消息: {self.push_message}, 判断该消息是否和该用户的特征匹配
            包括一下三点，每一点的满分为 8 分：
                语言风格：是否符合用户的年龄、性别、地域等特征；
                信息范围：是否超出用户认知范围；
                特征匹配：如果 user_profile中有兴趣爱好，健康状态，用药状态，居住地址，家庭成员等特征，在 push_message需要体现出来
            输出的格式为 JSON，示例如：{out_put_format}
        """
        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
        return completion

    def evaluate_agent_consistency(self):
        """人设一致性评估（16分）"""
        out_put_format = {
            "语言风格": {
                "score": 8,
                "reason": "语言风格符合用户特征，年龄、性别、地域等特征都匹配"
            },
            "消息边界": {
                "score": 8,
                "reason": "信息范围符合用户认知范围"
            },
        }
        analysis_prompt = f"""
            通过分析 agent 的一下特征：
                职业：{self.agent_profile['occupation']},
                年龄：{self.agent_profile['age']},
                性别：{self.agent_profile['gender']},
                地址：{self.agent_profile['region']},
                教育背景：{self.agent_profile['education']},
                工作经验：{self.agent_profile['work_experience']}
            和 agent 发送的消息: {self.push_message}, 判断该消息是否和该 agent 的特征匹配
            包括一下 2 点，每一点的满分为 8 分：
                语言风格：是否符合 agent 的年龄、性别、地域等特征；需要从不同性别，不同职业，不同年龄， 不同地域的人的说话风格去分析
                信息范围：是否超出 agent 认知范围；
            输出的格式为 JSON，示例如：{out_put_format}
        """
        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
        return completion

    def evaluate_interest_arousal(self):
        """兴趣激发评估（30）"""
        out_put_format = {
            "好奇设计": {
                "score": 15,
                "reason": "包含开放式问题，包括对用户认知边界的探索，用户兴趣爱好的联想等方面"
            },
            "利益设计": {
                "score": 15,
                "reason": "如果用户在历史对话中提到了某种需求，新的唤起消息是否有为用户解决需求的趋势"
            }
        }
        analysis_prompt = f"""
            通过分析用户的以下特征
                年龄: {self.user_profile['age']}, 
                性别: {self.user_profile['gender']}, 
                兴趣: {self.user_profile['interests']},
                健康状态: {self.user_profile['health_conditions']},
                用药状态: {self.user_profile['medications']},
                居住地址: {self.user_profile['region']},
                家庭成员: {self.user_profile['family_members']},
            和 agent 发送的消息: {self.push_message}, 以及用户和 agent 的历史对话: {self.dialogue_history}, 
            判断该消息是否能唤起用户的兴趣，驱动用户的聊天激情，主要从以下几2个方面考虑，每一个要点满分 15分
                好奇设计：是否包含开放式问题，包括对用户认知边界的探索，用户兴趣爱好的联想等方面
                利益设计：如果用户在历史对话中提到了某种需求，新的唤起消息是否有为用户解决需求的趋势；
                        如果没提出，可以从用户特征中联想出用户的需求，比如用户的兴趣爱好，健康状态，用药状态，居住地址，家庭成员等特征，
                        在 push_message需要体现出来
            输出的格式为 JSON，示例如：{out_put_format}
        """
        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
        return completion

    def evaluate_execution_quality(self):
        """执行质量评估（15分）"""
        output_format = {
            "信息保真": {
                "score": 5,
                "reason": "信息真实"
            },
            "政策合规": {
                "score": 5,
                "reason": "发送消息是否符合现有政策要求，是否违反相关规则"
            },
            "语言拟人": {
                "score": 5,
                "reason": "表现出一定的拟人化和情商"
            }
        }
        analysis_prompt = f"""
            通过分析 agent 发送的消息：{self.push_message}, 
            信息保真（满分 5 分）:
                拆分消息重点各个事件点，联网搜索，判断事件是否为真实事件，比如说节日祝福需要和发送时间{self.push_time}对应， 天气需要和地区相关
                不能在早上发晚上好之类等等
            政策合规（满分 5 分）:
                并且判断发送的消息是否存在违反规则的现象。
            语言拟人:（满分 5 分）:
                判断发送的消息是否存在机械化，同质化的现象，比如重复发相同或者相似的消息，比如过于机械的回复等等
            输出的格式为 JSON，示例如：{output_format}
        """
        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
        return completion

    def evaluate_emotional_intelligence(self):
        """情感智能评估（15分）"""
        output_format = {
            "共情深度": {
                "score": 10,
                "reason": "识别用户情感状态，提供相应的回应"
            },
        }
        analysis_prompt = f"""
             通过分析用户的以下特征
                年龄: {self.user_profile['age']}, 
                性别: {self.user_profile['gender']}, 
                兴趣: {self.user_profile['interests']},
                健康状态: {self.user_profile['health_conditions']},
                用药状态: {self.user_profile['medications']},
                居住地址: {self.user_profile['region']},
                家庭成员: {self.user_profile['family_members']},
            和 agent 和用户的对话：{self.dialogue_history} 挖掘出用户的情感需求
            并且 agent 发送的消息: {self.push_message}, 判断该message是否能和用户共情，提升用户的情感体验
            满分 15分
            输出格式为 JSON，示例如：{output_format}
        """
        completion = fetch_deepseek_completion(prompt=analysis_prompt, output_type='json')
        return completion


def evaluate_push_agent(
        dialogue_history,
        push_message,
        push_time,
        user_profile,
        agent_profile
):
    # 创建评估引擎实例
    evaluator = PushAgentEvaluator(
        dialogue_history,
        push_message,
        push_time,
        user_profile,
        agent_profile,
    )

    # 执行模块化评估
    evaluation_report = {
        "基础能力": {
            "用户匹配": evaluator.evaluate_user_match(),
            "人设一致": evaluator.evaluate_agent_consistency()
        },
        "任务效能": {
            "兴趣激发": evaluator.evaluate_interest_arousal(),
            "执行质量": evaluator.evaluate_execution_quality(),
            "情感智能": evaluator.evaluate_emotional_intelligence()
        }
    }

    return evaluation_report


if __name__ == '__main__':
    with open("dev.json") as f:
        data = json.load(f)

    evaluation_report = evaluate_push_agent(
        data['dialogue_history'],
        data['push_message'],
        data['push_time'],
        data['user_profile'],
        data['agent_profile']
    )
    print(json.dumps(evaluation_report, indent=4, ensure_ascii=False))