evaluation.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 评估结果数据模型
  5. """
  6. from dataclasses import dataclass, field
  7. from typing import List, Dict, Any, Optional
  8. @dataclass
  9. class NoteEvaluation:
  10. """单个帖子的评估结果"""
  11. note_id: str # 帖子ID
  12. channel_content_id: str # 频道内容ID
  13. title: str = "" # 标题
  14. body_text: str = "" # 正文
  15. # 第一层:Query相关性
  16. query_relevance: str = "" # "相关" or "不相关"
  17. query_relevance_reason: str = "" # 判断理由
  18. # 第二层:特征匹配度
  19. match_level: str = "" # 完全匹配/相似匹配/弱相似/无匹配
  20. match_score: float = 0.0 # 综合得分
  21. match_reason: str = "" # 匹配理由
  22. # 详细得分
  23. score_details: Optional[Dict[str, Any]] = None
  24. def to_dict(self) -> Dict[str, Any]:
  25. """转为字典"""
  26. return {
  27. "note_id": self.note_id,
  28. "channel_content_id": self.channel_content_id,
  29. "title": self.title,
  30. "body_text": self.body_text,
  31. "query_relevance": self.query_relevance,
  32. "query_relevance_reason": self.query_relevance_reason,
  33. "match_level": self.match_level,
  34. "match_score": self.match_score,
  35. "match_reason": self.match_reason,
  36. "score_details": self.score_details
  37. }
  38. @staticmethod
  39. def from_dict(data: Dict[str, Any]) -> 'NoteEvaluation':
  40. """从字典创建"""
  41. return NoteEvaluation(
  42. note_id=data.get('note_id', ''),
  43. channel_content_id=data.get('channel_content_id', ''),
  44. title=data.get('title', ''),
  45. body_text=data.get('body_text', ''),
  46. query_relevance=data.get('query_relevance', ''),
  47. query_relevance_reason=data.get('query_relevance_reason', ''),
  48. match_level=data.get('match_level', ''),
  49. match_score=data.get('match_score', 0.0),
  50. match_reason=data.get('match_reason', ''),
  51. score_details=data.get('score_details')
  52. )
  53. def is_relevant_to_query(self) -> bool:
  54. """是否与Query相关"""
  55. return self.query_relevance == "相关"
  56. def is_high_match(self) -> bool:
  57. """是否高匹配(完全匹配)"""
  58. return self.match_level == "完全匹配" or self.match_score >= 0.8
  59. @dataclass
  60. class QueryEvaluation:
  61. """Query的评估结果"""
  62. query_text: str # Query文本
  63. topic_point_name: str # 所属选题点
  64. # 统计信息
  65. total_notes: int = 0 # 总帖子数
  66. filtered_count: int = 0 # 第一层过滤掉的数量
  67. evaluated_count: int = 0 # 第二层评估的数量
  68. # 匹配度分布
  69. match_distribution: Dict[str, int] = field(default_factory=dict)
  70. # 详细评估结果
  71. notes_evaluation: List[NoteEvaluation] = field(default_factory=list)
  72. def to_dict(self) -> Dict[str, Any]:
  73. """转为字典"""
  74. return {
  75. "query_text": self.query_text,
  76. "topic_point_name": self.topic_point_name,
  77. "total_notes": self.total_notes,
  78. "filtered_count": self.filtered_count,
  79. "evaluated_count": self.evaluated_count,
  80. "match_distribution": self.match_distribution,
  81. "notes_evaluation": [ne.to_dict() for ne in self.notes_evaluation]
  82. }
  83. @staticmethod
  84. def from_dict(data: Dict[str, Any]) -> 'QueryEvaluation':
  85. """从字典创建"""
  86. notes_eval = [
  87. NoteEvaluation.from_dict(ne)
  88. for ne in data.get('notes_evaluation', [])
  89. ]
  90. return QueryEvaluation(
  91. query_text=data.get('query_text', ''),
  92. topic_point_name=data.get('topic_point_name', ''),
  93. total_notes=data.get('total_notes', 0),
  94. filtered_count=data.get('filtered_count', 0),
  95. evaluated_count=data.get('evaluated_count', 0),
  96. match_distribution=data.get('match_distribution', {}),
  97. notes_evaluation=notes_eval
  98. )
  99. def get_high_match_notes(self) -> List[NoteEvaluation]:
  100. """获取高匹配的帖子"""
  101. return [ne for ne in self.notes_evaluation if ne.is_high_match()]
  102. def get_relevant_notes(self) -> List[NoteEvaluation]:
  103. """获取与Query相关的帖子"""
  104. return [ne for ne in self.notes_evaluation if ne.is_relevant_to_query()]
  105. @dataclass
  106. class PostEvaluation:
  107. """帖子的所有评估结果"""
  108. post_id: str # 帖子ID
  109. query_evaluations: List[QueryEvaluation] = field(default_factory=list)
  110. def to_dict(self) -> Dict[str, Any]:
  111. """转为字典"""
  112. total_queries = len(self.query_evaluations)
  113. total_notes_evaluated = sum(qe.evaluated_count for qe in self.query_evaluations)
  114. total_high_match = sum(
  115. len(qe.get_high_match_notes())
  116. for qe in self.query_evaluations
  117. )
  118. return {
  119. "post_id": self.post_id,
  120. "query_evaluations": [qe.to_dict() for qe in self.query_evaluations],
  121. "statistics": {
  122. "total_queries": total_queries,
  123. "total_notes_evaluated": total_notes_evaluated,
  124. "total_high_match_notes": total_high_match
  125. }
  126. }
  127. @staticmethod
  128. def from_dict(data: Dict[str, Any]) -> 'PostEvaluation':
  129. """从字典创建"""
  130. qe_list = [
  131. QueryEvaluation.from_dict(qe)
  132. for qe in data.get('query_evaluations', [])
  133. ]
  134. return PostEvaluation(
  135. post_id=data.get('post_id', ''),
  136. query_evaluations=qe_list
  137. )
  138. def get_all_high_match_notes(self) -> List[NoteEvaluation]:
  139. """获取所有高匹配的帖子"""
  140. all_notes = []
  141. for qe in self.query_evaluations:
  142. all_notes.extend(qe.get_high_match_notes())
  143. return all_notes