| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 评估结果数据模型
- """
- from dataclasses import dataclass, field
- from typing import List, Dict, Any, Optional
- @dataclass
- class NoteEvaluation:
- """单个帖子的评估结果"""
- note_id: str # 帖子ID
- channel_content_id: str # 频道内容ID
- title: str = "" # 标题
- body_text: str = "" # 正文
- # 第一层:Query相关性
- query_relevance: str = "" # "相关" or "不相关"
- query_relevance_reason: str = "" # 判断理由
- # 第二层:特征匹配度
- match_level: str = "" # 完全匹配/相似匹配/弱相似/无匹配
- match_score: float = 0.0 # 综合得分
- match_reason: str = "" # 匹配理由
- # 详细得分
- score_details: Optional[Dict[str, Any]] = None
- def to_dict(self) -> Dict[str, Any]:
- """转为字典"""
- return {
- "note_id": self.note_id,
- "channel_content_id": self.channel_content_id,
- "title": self.title,
- "body_text": self.body_text,
- "query_relevance": self.query_relevance,
- "query_relevance_reason": self.query_relevance_reason,
- "match_level": self.match_level,
- "match_score": self.match_score,
- "match_reason": self.match_reason,
- "score_details": self.score_details
- }
- @staticmethod
- def from_dict(data: Dict[str, Any]) -> 'NoteEvaluation':
- """从字典创建"""
- return NoteEvaluation(
- note_id=data.get('note_id', ''),
- channel_content_id=data.get('channel_content_id', ''),
- title=data.get('title', ''),
- body_text=data.get('body_text', ''),
- query_relevance=data.get('query_relevance', ''),
- query_relevance_reason=data.get('query_relevance_reason', ''),
- match_level=data.get('match_level', ''),
- match_score=data.get('match_score', 0.0),
- match_reason=data.get('match_reason', ''),
- score_details=data.get('score_details')
- )
- def is_relevant_to_query(self) -> bool:
- """是否与Query相关"""
- return self.query_relevance == "相关"
- def is_high_match(self) -> bool:
- """是否高匹配(完全匹配)"""
- return self.match_level == "完全匹配" or self.match_score >= 0.8
- @dataclass
- class QueryEvaluation:
- """Query的评估结果"""
- query_text: str # Query文本
- topic_point_name: str # 所属选题点
- # 统计信息
- total_notes: int = 0 # 总帖子数
- filtered_count: int = 0 # 第一层过滤掉的数量
- evaluated_count: int = 0 # 第二层评估的数量
- # 匹配度分布
- match_distribution: Dict[str, int] = field(default_factory=dict)
- # 详细评估结果
- notes_evaluation: List[NoteEvaluation] = field(default_factory=list)
- def to_dict(self) -> Dict[str, Any]:
- """转为字典"""
- return {
- "query_text": self.query_text,
- "topic_point_name": self.topic_point_name,
- "total_notes": self.total_notes,
- "filtered_count": self.filtered_count,
- "evaluated_count": self.evaluated_count,
- "match_distribution": self.match_distribution,
- "notes_evaluation": [ne.to_dict() for ne in self.notes_evaluation]
- }
- @staticmethod
- def from_dict(data: Dict[str, Any]) -> 'QueryEvaluation':
- """从字典创建"""
- notes_eval = [
- NoteEvaluation.from_dict(ne)
- for ne in data.get('notes_evaluation', [])
- ]
- return QueryEvaluation(
- query_text=data.get('query_text', ''),
- topic_point_name=data.get('topic_point_name', ''),
- total_notes=data.get('total_notes', 0),
- filtered_count=data.get('filtered_count', 0),
- evaluated_count=data.get('evaluated_count', 0),
- match_distribution=data.get('match_distribution', {}),
- notes_evaluation=notes_eval
- )
- def get_high_match_notes(self) -> List[NoteEvaluation]:
- """获取高匹配的帖子"""
- return [ne for ne in self.notes_evaluation if ne.is_high_match()]
- def get_relevant_notes(self) -> List[NoteEvaluation]:
- """获取与Query相关的帖子"""
- return [ne for ne in self.notes_evaluation if ne.is_relevant_to_query()]
- @dataclass
- class PostEvaluation:
- """帖子的所有评估结果"""
- post_id: str # 帖子ID
- query_evaluations: List[QueryEvaluation] = field(default_factory=list)
- def to_dict(self) -> Dict[str, Any]:
- """转为字典"""
- total_queries = len(self.query_evaluations)
- total_notes_evaluated = sum(qe.evaluated_count for qe in self.query_evaluations)
- total_high_match = sum(
- len(qe.get_high_match_notes())
- for qe in self.query_evaluations
- )
- return {
- "post_id": self.post_id,
- "query_evaluations": [qe.to_dict() for qe in self.query_evaluations],
- "statistics": {
- "total_queries": total_queries,
- "total_notes_evaluated": total_notes_evaluated,
- "total_high_match_notes": total_high_match
- }
- }
- @staticmethod
- def from_dict(data: Dict[str, Any]) -> 'PostEvaluation':
- """从字典创建"""
- qe_list = [
- QueryEvaluation.from_dict(qe)
- for qe in data.get('query_evaluations', [])
- ]
- return PostEvaluation(
- post_id=data.get('post_id', ''),
- query_evaluations=qe_list
- )
- def get_all_high_match_notes(self) -> List[NoteEvaluation]:
- """获取所有高匹配的帖子"""
- all_notes = []
- for qe in self.query_evaluations:
- all_notes.extend(qe.get_high_match_notes())
- return all_notes
|