| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 解构分析数据模型
- """
- from dataclasses import dataclass, field, asdict
- from typing import List, Dict, Any, Optional
- from datetime import datetime
- @dataclass
- class DeconstructedFeature:
- """解构特征"""
- feature_name: str # 特征名称
- dimension: str # 维度 (灵感点-全新内容/灵感点-共性差异/灵感点-共性内容/目的点/关键点)
- dimension_detail: str # 维度细分 (实质/形式/意图等)
- weight: float # 权重
- source_index: int # 在该维度中的索引
- source_info: Dict[str, Any] = field(default_factory=dict) # 溯源信息
- @dataclass
- class DeconstructionResult:
- """单个帖子的解构结果"""
- note_id: str # 帖子ID
- search_word: str # 搜索词
- original_feature: str # 原始特征
- source_word: str # 来源词
- evaluation_score: float # 评估得分
- evaluation_type: str # 匹配类型
- evaluation_confidence: str # 置信度
- key_matching_points: List[str] # 关键匹配点
- # 解构特征
- inspiration_features: List[DeconstructedFeature] = field(default_factory=list) # 灵感点特征
- purpose_features: List[DeconstructedFeature] = field(default_factory=list) # 目的点特征
- key_point_features: List[DeconstructedFeature] = field(default_factory=list) # 关键点特征
- # 帖子数据
- note_data: Dict[str, Any] = field(default_factory=dict) # 帖子信息 (title, author, link)
- # API响应
- api_request: Dict[str, Any] = field(default_factory=dict) # API请求
- api_response: Dict[str, Any] = field(default_factory=dict) # API响应
- # 元数据
- processed_at: str = "" # 处理时间
- processing_time_ms: float = 0.0 # 处理耗时(毫秒)
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return asdict(self)
- @property
- def all_features(self) -> List[DeconstructedFeature]:
- """获取所有特征"""
- return (
- self.inspiration_features +
- self.purpose_features +
- self.key_point_features
- )
- @property
- def feature_count(self) -> int:
- """特征总数"""
- return len(self.all_features)
- @dataclass
- class PostDeconstruction:
- """帖子解构结果集合"""
- post_id: str # 帖子ID
- deconstruction_results: List[DeconstructionResult] # 解构结果列表
- # 元数据
- total_matched_notes: int = 0 # 总匹配帖子数
- processed_notes: int = 0 # 已处理帖子数
- skipped_notes: int = 0 # 跳过帖子数
- success_count: int = 0 # 成功数
- failed_count: int = 0 # 失败数
- # 配置参数
- api_url: str = "" # API地址
- min_score_threshold: float = 0.0 # 最低分数阈值
- sort_by: str = "score" # 排序方式
- target_features: Optional[List[str]] = None # 目标特征列表
- # 时间信息
- created_at: str = "" # 创建时间
- processing_time_seconds: float = 0.0 # 处理耗时(秒)
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'metadata': {
- 'stage': 'deconstruction',
- 'description': '完全匹配帖子的深度解构分析',
- 'post_id': self.post_id,
- 'target_features': self.target_features if self.target_features else '全部',
- 'total_matched_notes': self.total_matched_notes,
- 'processed_notes': self.processed_notes,
- 'skipped_notes': self.skipped_notes,
- 'success_count': self.success_count,
- 'failed_count': self.failed_count,
- 'api_url': self.api_url,
- 'min_score_threshold': self.min_score_threshold,
- 'sort_by': self.sort_by,
- 'created_at': self.created_at or datetime.now().isoformat(),
- 'processing_time_seconds': round(self.processing_time_seconds, 2)
- },
- 'results': [r.to_dict() for r in self.deconstruction_results]
- }
- @classmethod
- def from_json_file(cls, file_path: str) -> 'PostDeconstruction':
- """从JSON文件加载"""
- import json
- with open(file_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
- metadata = data['metadata']
- results_data = data['results']
- # 重建DeconstructionResult对象
- results = []
- for r in results_data:
- # 重建特征列表
- inspiration_features = [
- DeconstructedFeature(**f) for f in r.get('inspiration_features', [])
- ]
- purpose_features = [
- DeconstructedFeature(**f) for f in r.get('purpose_features', [])
- ]
- key_point_features = [
- DeconstructedFeature(**f) for f in r.get('key_point_features', [])
- ]
- result = DeconstructionResult(
- note_id=r['note_id'],
- search_word=r['search_word'],
- original_feature=r['original_feature'],
- source_word=r['source_word'],
- evaluation_score=r['evaluation_score'],
- evaluation_type=r['evaluation_type'],
- evaluation_confidence=r['evaluation_confidence'],
- key_matching_points=r['key_matching_points'],
- inspiration_features=inspiration_features,
- purpose_features=purpose_features,
- key_point_features=key_point_features,
- note_data=r['note_data'],
- api_request=r['api_request'],
- api_response=r['api_response'],
- processed_at=r['processed_at'],
- processing_time_ms=r['processing_time_ms']
- )
- results.append(result)
- return cls(
- post_id=metadata['post_id'],
- deconstruction_results=results,
- total_matched_notes=metadata['total_matched_notes'],
- processed_notes=metadata['processed_notes'],
- skipped_notes=metadata['skipped_notes'],
- success_count=metadata['success_count'],
- failed_count=metadata['failed_count'],
- api_url=metadata['api_url'],
- min_score_threshold=metadata['min_score_threshold'],
- sort_by=metadata['sort_by'],
- target_features=metadata.get('target_features'),
- created_at=metadata['created_at'],
- processing_time_seconds=metadata['processing_time_seconds']
- )
- def get_statistics(self) -> Dict[str, Any]:
- """获取统计信息"""
- total_features = sum(r.feature_count for r in self.deconstruction_results)
- return {
- 'total_notes': len(self.deconstruction_results),
- 'total_features': total_features,
- 'avg_features_per_note': round(total_features / len(self.deconstruction_results), 1) if self.deconstruction_results else 0,
- 'success_rate': round(self.success_count / self.processed_notes, 3) if self.processed_notes > 0 else 0
- }
|