#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 评估结果数据模型 """ from dataclasses import dataclass, field from typing import List, Dict, Any, Optional @dataclass class NoteEvaluation: """单个帖子的评估结果""" note_id: str # 帖子ID channel_content_id: str # 频道内容ID title: str = "" # 标题 body_text: str = "" # 正文 # 第一层:Query相关性 query_relevance: str = "" # "相关" or "不相关" query_relevance_reason: str = "" # 判断理由 # 第二层:特征匹配度 match_level: str = "" # 完全匹配/相似匹配/弱相似/无匹配 match_score: float = 0.0 # 综合得分 match_reason: str = "" # 匹配理由 # 详细得分 score_details: Optional[Dict[str, Any]] = None def to_dict(self) -> Dict[str, Any]: """转为字典""" return { "note_id": self.note_id, "channel_content_id": self.channel_content_id, "title": self.title, "body_text": self.body_text, "query_relevance": self.query_relevance, "query_relevance_reason": self.query_relevance_reason, "match_level": self.match_level, "match_score": self.match_score, "match_reason": self.match_reason, "score_details": self.score_details } @staticmethod def from_dict(data: Dict[str, Any]) -> 'NoteEvaluation': """从字典创建""" return NoteEvaluation( note_id=data.get('note_id', ''), channel_content_id=data.get('channel_content_id', ''), title=data.get('title', ''), body_text=data.get('body_text', ''), query_relevance=data.get('query_relevance', ''), query_relevance_reason=data.get('query_relevance_reason', ''), match_level=data.get('match_level', ''), match_score=data.get('match_score', 0.0), match_reason=data.get('match_reason', ''), score_details=data.get('score_details') ) def is_relevant_to_query(self) -> bool: """是否与Query相关""" return self.query_relevance == "相关" def is_high_match(self) -> bool: """是否高匹配(完全匹配)""" return self.match_level == "完全匹配" or self.match_score >= 0.8 @dataclass class QueryEvaluation: """Query的评估结果""" query_text: str # Query文本 topic_point_name: str # 所属选题点 # 统计信息 total_notes: int = 0 # 总帖子数 filtered_count: int = 0 # 第一层过滤掉的数量 evaluated_count: int = 0 # 第二层评估的数量 # 匹配度分布 match_distribution: Dict[str, int] = field(default_factory=dict) # 详细评估结果 notes_evaluation: List[NoteEvaluation] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: """转为字典""" return { "query_text": self.query_text, "topic_point_name": self.topic_point_name, "total_notes": self.total_notes, "filtered_count": self.filtered_count, "evaluated_count": self.evaluated_count, "match_distribution": self.match_distribution, "notes_evaluation": [ne.to_dict() for ne in self.notes_evaluation] } @staticmethod def from_dict(data: Dict[str, Any]) -> 'QueryEvaluation': """从字典创建""" notes_eval = [ NoteEvaluation.from_dict(ne) for ne in data.get('notes_evaluation', []) ] return QueryEvaluation( query_text=data.get('query_text', ''), topic_point_name=data.get('topic_point_name', ''), total_notes=data.get('total_notes', 0), filtered_count=data.get('filtered_count', 0), evaluated_count=data.get('evaluated_count', 0), match_distribution=data.get('match_distribution', {}), notes_evaluation=notes_eval ) def get_high_match_notes(self) -> List[NoteEvaluation]: """获取高匹配的帖子""" return [ne for ne in self.notes_evaluation if ne.is_high_match()] def get_relevant_notes(self) -> List[NoteEvaluation]: """获取与Query相关的帖子""" return [ne for ne in self.notes_evaluation if ne.is_relevant_to_query()] @dataclass class PostEvaluation: """帖子的所有评估结果""" post_id: str # 帖子ID query_evaluations: List[QueryEvaluation] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: """转为字典""" total_queries = len(self.query_evaluations) total_notes_evaluated = sum(qe.evaluated_count for qe in self.query_evaluations) total_high_match = sum( len(qe.get_high_match_notes()) for qe in self.query_evaluations ) return { "post_id": self.post_id, "query_evaluations": [qe.to_dict() for qe in self.query_evaluations], "statistics": { "total_queries": total_queries, "total_notes_evaluated": total_notes_evaluated, "total_high_match_notes": total_high_match } } @staticmethod def from_dict(data: Dict[str, Any]) -> 'PostEvaluation': """从字典创建""" qe_list = [ QueryEvaluation.from_dict(qe) for qe in data.get('query_evaluations', []) ] return PostEvaluation( post_id=data.get('post_id', ''), query_evaluations=qe_list ) def get_all_high_match_notes(self) -> List[NoteEvaluation]: """获取所有高匹配的帖子""" all_notes = [] for qe in self.query_evaluations: all_notes.extend(qe.get_high_match_notes()) return all_notes