liulidong
/
knowledge_search


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
解构分析数据模型
"""

from dataclasses import dataclass, field, asdict
from typing import List, Dict, Any, Optional
from datetime import datetime


@dataclass
class DeconstructedFeature:
    """解构特征"""
    feature_name: str                    # 特征名称
    dimension: str                       # 维度 (灵感点-全新内容/灵感点-共性差异/灵感点-共性内容/目的点/关键点)
    dimension_detail: str                # 维度细分 (实质/形式/意图等)
    weight: float                        # 权重
    source_index: int                    # 在该维度中的索引
    source_info: Dict[str, Any] = field(default_factory=dict)  # 溯源信息


@dataclass
class DeconstructionResult:
    """单个帖子的解构结果"""
    note_id: str                         # 帖子ID
    search_word: str                     # 搜索词
    original_feature: str                # 原始特征
    source_word: str                     # 来源词
    evaluation_score: float              # 评估得分
    evaluation_type: str                 # 匹配类型
    evaluation_confidence: str           # 置信度
    key_matching_points: List[str]       # 关键匹配点

    # 解构特征
    inspiration_features: List[DeconstructedFeature] = field(default_factory=list)  # 灵感点特征
    purpose_features: List[DeconstructedFeature] = field(default_factory=list)      # 目的点特征
    key_point_features: List[DeconstructedFeature] = field(default_factory=list)    # 关键点特征

    # 帖子数据
    note_data: Dict[str, Any] = field(default_factory=dict)  # 帖子信息 (title, author, link)

    # API响应
    api_request: Dict[str, Any] = field(default_factory=dict)   # API请求
    api_response: Dict[str, Any] = field(default_factory=dict)  # API响应

    # 元数据
    processed_at: str = ""                # 处理时间
    processing_time_ms: float = 0.0       # 处理耗时(毫秒)

    def to_dict(self) -> Dict[str, Any]:
        """转换为字典"""
        return asdict(self)

    @property
    def all_features(self) -> List[DeconstructedFeature]:
        """获取所有特征"""
        return (
            self.inspiration_features +
            self.purpose_features +
            self.key_point_features
        )

    @property
    def feature_count(self) -> int:
        """特征总数"""
        return len(self.all_features)


@dataclass
class PostDeconstruction:
    """帖子解构结果集合"""
    post_id: str                                              # 帖子ID
    deconstruction_results: List[DeconstructionResult]        # 解构结果列表

    # 元数据
    total_matched_notes: int = 0                              # 总匹配帖子数
    processed_notes: int = 0                                  # 已处理帖子数
    skipped_notes: int = 0                                    # 跳过帖子数
    success_count: int = 0                                    # 成功数
    failed_count: int = 0                                     # 失败数

    # 配置参数
    api_url: str = ""                                         # API地址
    min_score_threshold: float = 0.0                          # 最低分数阈值
    sort_by: str = "score"                                    # 排序方式
    target_features: Optional[List[str]] = None               # 目标特征列表

    # 时间信息
    created_at: str = ""                                      # 创建时间
    processing_time_seconds: float = 0.0                      # 处理耗时(秒)

    def to_dict(self) -> Dict[str, Any]:
        """转换为字典"""
        return {
            'metadata': {
                'stage': 'deconstruction',
                'description': '完全匹配帖子的深度解构分析',
                'post_id': self.post_id,
                'target_features': self.target_features if self.target_features else '全部',
                'total_matched_notes': self.total_matched_notes,
                'processed_notes': self.processed_notes,
                'skipped_notes': self.skipped_notes,
                'success_count': self.success_count,
                'failed_count': self.failed_count,
                'api_url': self.api_url,
                'min_score_threshold': self.min_score_threshold,
                'sort_by': self.sort_by,
                'created_at': self.created_at or datetime.now().isoformat(),
                'processing_time_seconds': round(self.processing_time_seconds, 2)
            },
            'results': [r.to_dict() for r in self.deconstruction_results]
        }

    @classmethod
    def from_json_file(cls, file_path: str) -> 'PostDeconstruction':
        """从JSON文件加载"""
        import json
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        metadata = data['metadata']
        results_data = data['results']

        # 重建DeconstructionResult对象
        results = []
        for r in results_data:
            # 重建特征列表
            inspiration_features = [
                DeconstructedFeature(**f) for f in r.get('inspiration_features', [])
            ]
            purpose_features = [
                DeconstructedFeature(**f) for f in r.get('purpose_features', [])
            ]
            key_point_features = [
                DeconstructedFeature(**f) for f in r.get('key_point_features', [])
            ]

            result = DeconstructionResult(
                note_id=r['note_id'],
                search_word=r['search_word'],
                original_feature=r['original_feature'],
                source_word=r['source_word'],
                evaluation_score=r['evaluation_score'],
                evaluation_type=r['evaluation_type'],
                evaluation_confidence=r['evaluation_confidence'],
                key_matching_points=r['key_matching_points'],
                inspiration_features=inspiration_features,
                purpose_features=purpose_features,
                key_point_features=key_point_features,
                note_data=r['note_data'],
                api_request=r['api_request'],
                api_response=r['api_response'],
                processed_at=r['processed_at'],
                processing_time_ms=r['processing_time_ms']
            )
            results.append(result)

        return cls(
            post_id=metadata['post_id'],
            deconstruction_results=results,
            total_matched_notes=metadata['total_matched_notes'],
            processed_notes=metadata['processed_notes'],
            skipped_notes=metadata['skipped_notes'],
            success_count=metadata['success_count'],
            failed_count=metadata['failed_count'],
            api_url=metadata['api_url'],
            min_score_threshold=metadata['min_score_threshold'],
            sort_by=metadata['sort_by'],
            target_features=metadata.get('target_features'),
            created_at=metadata['created_at'],
            processing_time_seconds=metadata['processing_time_seconds']
        )

    def get_statistics(self) -> Dict[str, Any]:
        """获取统计信息"""
        total_features = sum(r.feature_count for r in self.deconstruction_results)

        return {
            'total_notes': len(self.deconstruction_results),
            'total_features': total_features,
            'avg_features_per_note': round(total_features / len(self.deconstruction_results), 1) if self.deconstruction_results else 0,
            'success_rate': round(self.success_count / self.processed_notes, 3) if self.processed_notes > 0 else 0
        }