liulidong
/
knowledge_search


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
评估结果数据模型
"""

from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional


@dataclass
class NoteEvaluation:
    """单个帖子的评估结果"""
    note_id: str                        # 帖子ID
    channel_content_id: str             # 频道内容ID
    title: str = ""                     # 标题
    body_text: str = ""                 # 正文

    # 第一层：Query相关性
    query_relevance: str = ""           # "相关" or "不相关"
    query_relevance_reason: str = ""    # 判断理由

    # 第二层：特征匹配度
    match_level: str = ""               # 完全匹配/相似匹配/弱相似/无匹配
    match_score: float = 0.0            # 综合得分
    match_reason: str = ""              # 匹配理由

    # 详细得分
    score_details: Optional[Dict[str, Any]] = None

    def to_dict(self) -> Dict[str, Any]:
        """转为字典"""
        return {
            "note_id": self.note_id,
            "channel_content_id": self.channel_content_id,
            "title": self.title,
            "body_text": self.body_text,
            "query_relevance": self.query_relevance,
            "query_relevance_reason": self.query_relevance_reason,
            "match_level": self.match_level,
            "match_score": self.match_score,
            "match_reason": self.match_reason,
            "score_details": self.score_details
        }

    @staticmethod
    def from_dict(data: Dict[str, Any]) -> 'NoteEvaluation':
        """从字典创建"""
        return NoteEvaluation(
            note_id=data.get('note_id', ''),
            channel_content_id=data.get('channel_content_id', ''),
            title=data.get('title', ''),
            body_text=data.get('body_text', ''),
            query_relevance=data.get('query_relevance', ''),
            query_relevance_reason=data.get('query_relevance_reason', ''),
            match_level=data.get('match_level', ''),
            match_score=data.get('match_score', 0.0),
            match_reason=data.get('match_reason', ''),
            score_details=data.get('score_details')
        )

    def is_relevant_to_query(self) -> bool:
        """是否与Query相关"""
        return self.query_relevance == "相关"

    def is_high_match(self) -> bool:
        """是否高匹配（完全匹配）"""
        return self.match_level == "完全匹配" or self.match_score >= 0.8


@dataclass
class QueryEvaluation:
    """Query的评估结果"""
    query_text: str                     # Query文本
    topic_point_name: str               # 所属选题点

    # 统计信息
    total_notes: int = 0                # 总帖子数
    filtered_count: int = 0             # 第一层过滤掉的数量
    evaluated_count: int = 0            # 第二层评估的数量

    # 匹配度分布
    match_distribution: Dict[str, int] = field(default_factory=dict)

    # 详细评估结果
    notes_evaluation: List[NoteEvaluation] = field(default_factory=list)

    def to_dict(self) -> Dict[str, Any]:
        """转为字典"""
        return {
            "query_text": self.query_text,
            "topic_point_name": self.topic_point_name,
            "total_notes": self.total_notes,
            "filtered_count": self.filtered_count,
            "evaluated_count": self.evaluated_count,
            "match_distribution": self.match_distribution,
            "notes_evaluation": [ne.to_dict() for ne in self.notes_evaluation]
        }

    @staticmethod
    def from_dict(data: Dict[str, Any]) -> 'QueryEvaluation':
        """从字典创建"""
        notes_eval = [
            NoteEvaluation.from_dict(ne)
            for ne in data.get('notes_evaluation', [])
        ]

        return QueryEvaluation(
            query_text=data.get('query_text', ''),
            topic_point_name=data.get('topic_point_name', ''),
            total_notes=data.get('total_notes', 0),
            filtered_count=data.get('filtered_count', 0),
            evaluated_count=data.get('evaluated_count', 0),
            match_distribution=data.get('match_distribution', {}),
            notes_evaluation=notes_eval
        )

    def get_high_match_notes(self) -> List[NoteEvaluation]:
        """获取高匹配的帖子"""
        return [ne for ne in self.notes_evaluation if ne.is_high_match()]

    def get_relevant_notes(self) -> List[NoteEvaluation]:
        """获取与Query相关的帖子"""
        return [ne for ne in self.notes_evaluation if ne.is_relevant_to_query()]


@dataclass
class PostEvaluation:
    """帖子的所有评估结果"""
    post_id: str                        # 帖子ID
    query_evaluations: List[QueryEvaluation] = field(default_factory=list)

    def to_dict(self) -> Dict[str, Any]:
        """转为字典"""
        total_queries = len(self.query_evaluations)
        total_notes_evaluated = sum(qe.evaluated_count for qe in self.query_evaluations)
        total_high_match = sum(
            len(qe.get_high_match_notes())
            for qe in self.query_evaluations
        )

        return {
            "post_id": self.post_id,
            "query_evaluations": [qe.to_dict() for qe in self.query_evaluations],
            "statistics": {
                "total_queries": total_queries,
                "total_notes_evaluated": total_notes_evaluated,
                "total_high_match_notes": total_high_match
            }
        }

    @staticmethod
    def from_dict(data: Dict[str, Any]) -> 'PostEvaluation':
        """从字典创建"""
        qe_list = [
            QueryEvaluation.from_dict(qe)
            for qe in data.get('query_evaluations', [])
        ]

        return PostEvaluation(
            post_id=data.get('post_id', ''),
            query_evaluations=qe_list
        )

    def get_all_high_match_notes(self) -> List[NoteEvaluation]:
        """获取所有高匹配的帖子"""
        all_notes = []
        for qe in self.query_evaluations:
            all_notes.extend(qe.get_high_match_notes())
        return all_notes