#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 解构分析数据模型 """ from dataclasses import dataclass, field, asdict from typing import List, Dict, Any, Optional from datetime import datetime @dataclass class DeconstructedFeature: """解构特征""" feature_name: str # 特征名称 dimension: str # 维度 (灵感点-全新内容/灵感点-共性差异/灵感点-共性内容/目的点/关键点) dimension_detail: str # 维度细分 (实质/形式/意图等) weight: float # 权重 source_index: int # 在该维度中的索引 source_info: Dict[str, Any] = field(default_factory=dict) # 溯源信息 @dataclass class DeconstructionResult: """单个帖子的解构结果""" note_id: str # 帖子ID search_word: str # 搜索词 original_feature: str # 原始特征 source_word: str # 来源词 evaluation_score: float # 评估得分 evaluation_type: str # 匹配类型 evaluation_confidence: str # 置信度 key_matching_points: List[str] # 关键匹配点 # 解构特征 inspiration_features: List[DeconstructedFeature] = field(default_factory=list) # 灵感点特征 purpose_features: List[DeconstructedFeature] = field(default_factory=list) # 目的点特征 key_point_features: List[DeconstructedFeature] = field(default_factory=list) # 关键点特征 # 帖子数据 note_data: Dict[str, Any] = field(default_factory=dict) # 帖子信息 (title, author, link) # API响应 api_request: Dict[str, Any] = field(default_factory=dict) # API请求 api_response: Dict[str, Any] = field(default_factory=dict) # API响应 # 元数据 processed_at: str = "" # 处理时间 processing_time_ms: float = 0.0 # 处理耗时(毫秒) def to_dict(self) -> Dict[str, Any]: """转换为字典""" return asdict(self) @property def all_features(self) -> List[DeconstructedFeature]: """获取所有特征""" return ( self.inspiration_features + self.purpose_features + self.key_point_features ) @property def feature_count(self) -> int: """特征总数""" return len(self.all_features) @dataclass class PostDeconstruction: """帖子解构结果集合""" post_id: str # 帖子ID deconstruction_results: List[DeconstructionResult] # 解构结果列表 # 元数据 total_matched_notes: int = 0 # 总匹配帖子数 processed_notes: int = 0 # 已处理帖子数 skipped_notes: int = 0 # 跳过帖子数 success_count: int = 0 # 成功数 failed_count: int = 0 # 失败数 # 配置参数 api_url: str = "" # API地址 min_score_threshold: float = 0.0 # 最低分数阈值 sort_by: str = "score" # 排序方式 target_features: Optional[List[str]] = None # 目标特征列表 # 时间信息 created_at: str = "" # 创建时间 processing_time_seconds: float = 0.0 # 处理耗时(秒) def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { 'metadata': { 'stage': 'deconstruction', 'description': '完全匹配帖子的深度解构分析', 'post_id': self.post_id, 'target_features': self.target_features if self.target_features else '全部', 'total_matched_notes': self.total_matched_notes, 'processed_notes': self.processed_notes, 'skipped_notes': self.skipped_notes, 'success_count': self.success_count, 'failed_count': self.failed_count, 'api_url': self.api_url, 'min_score_threshold': self.min_score_threshold, 'sort_by': self.sort_by, 'created_at': self.created_at or datetime.now().isoformat(), 'processing_time_seconds': round(self.processing_time_seconds, 2) }, 'results': [r.to_dict() for r in self.deconstruction_results] } @classmethod def from_json_file(cls, file_path: str) -> 'PostDeconstruction': """从JSON文件加载""" import json with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) metadata = data['metadata'] results_data = data['results'] # 重建DeconstructionResult对象 results = [] for r in results_data: # 重建特征列表 inspiration_features = [ DeconstructedFeature(**f) for f in r.get('inspiration_features', []) ] purpose_features = [ DeconstructedFeature(**f) for f in r.get('purpose_features', []) ] key_point_features = [ DeconstructedFeature(**f) for f in r.get('key_point_features', []) ] result = DeconstructionResult( note_id=r['note_id'], search_word=r['search_word'], original_feature=r['original_feature'], source_word=r['source_word'], evaluation_score=r['evaluation_score'], evaluation_type=r['evaluation_type'], evaluation_confidence=r['evaluation_confidence'], key_matching_points=r['key_matching_points'], inspiration_features=inspiration_features, purpose_features=purpose_features, key_point_features=key_point_features, note_data=r['note_data'], api_request=r['api_request'], api_response=r['api_response'], processed_at=r['processed_at'], processing_time_ms=r['processing_time_ms'] ) results.append(result) return cls( post_id=metadata['post_id'], deconstruction_results=results, total_matched_notes=metadata['total_matched_notes'], processed_notes=metadata['processed_notes'], skipped_notes=metadata['skipped_notes'], success_count=metadata['success_count'], failed_count=metadata['failed_count'], api_url=metadata['api_url'], min_score_threshold=metadata['min_score_threshold'], sort_by=metadata['sort_by'], target_features=metadata.get('target_features'), created_at=metadata['created_at'], processing_time_seconds=metadata['processing_time_seconds'] ) def get_statistics(self) -> Dict[str, Any]: """获取统计信息""" total_features = sum(r.feature_count for r in self.deconstruction_results) return { 'total_notes': len(self.deconstruction_results), 'total_features': total_features, 'avg_features_per_note': round(total_features / len(self.deconstruction_results), 1) if self.deconstruction_results else 0, 'success_rate': round(self.success_count / self.processed_notes, 3) if self.processed_notes > 0 else 0 }