deconstruction.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 解构分析数据模型
  5. """
  6. from dataclasses import dataclass, field, asdict
  7. from typing import List, Dict, Any, Optional
  8. from datetime import datetime
  9. @dataclass
  10. class DeconstructedFeature:
  11. """解构特征"""
  12. feature_name: str # 特征名称
  13. dimension: str # 维度 (灵感点-全新内容/灵感点-共性差异/灵感点-共性内容/目的点/关键点)
  14. dimension_detail: str # 维度细分 (实质/形式/意图等)
  15. weight: float # 权重
  16. source_index: int # 在该维度中的索引
  17. source_info: Dict[str, Any] = field(default_factory=dict) # 溯源信息
  18. @dataclass
  19. class DeconstructionResult:
  20. """单个帖子的解构结果"""
  21. note_id: str # 帖子ID
  22. search_word: str # 搜索词
  23. original_feature: str # 原始特征
  24. source_word: str # 来源词
  25. evaluation_score: float # 评估得分
  26. evaluation_type: str # 匹配类型
  27. evaluation_confidence: str # 置信度
  28. key_matching_points: List[str] # 关键匹配点
  29. # 解构特征
  30. inspiration_features: List[DeconstructedFeature] = field(default_factory=list) # 灵感点特征
  31. purpose_features: List[DeconstructedFeature] = field(default_factory=list) # 目的点特征
  32. key_point_features: List[DeconstructedFeature] = field(default_factory=list) # 关键点特征
  33. # 帖子数据
  34. note_data: Dict[str, Any] = field(default_factory=dict) # 帖子信息 (title, author, link)
  35. # API响应
  36. api_request: Dict[str, Any] = field(default_factory=dict) # API请求
  37. api_response: Dict[str, Any] = field(default_factory=dict) # API响应
  38. # 元数据
  39. processed_at: str = "" # 处理时间
  40. processing_time_ms: float = 0.0 # 处理耗时(毫秒)
  41. def to_dict(self) -> Dict[str, Any]:
  42. """转换为字典"""
  43. return asdict(self)
  44. @property
  45. def all_features(self) -> List[DeconstructedFeature]:
  46. """获取所有特征"""
  47. return (
  48. self.inspiration_features +
  49. self.purpose_features +
  50. self.key_point_features
  51. )
  52. @property
  53. def feature_count(self) -> int:
  54. """特征总数"""
  55. return len(self.all_features)
  56. @dataclass
  57. class PostDeconstruction:
  58. """帖子解构结果集合"""
  59. post_id: str # 帖子ID
  60. deconstruction_results: List[DeconstructionResult] # 解构结果列表
  61. # 元数据
  62. total_matched_notes: int = 0 # 总匹配帖子数
  63. processed_notes: int = 0 # 已处理帖子数
  64. skipped_notes: int = 0 # 跳过帖子数
  65. success_count: int = 0 # 成功数
  66. failed_count: int = 0 # 失败数
  67. # 配置参数
  68. api_url: str = "" # API地址
  69. min_score_threshold: float = 0.0 # 最低分数阈值
  70. sort_by: str = "score" # 排序方式
  71. target_features: Optional[List[str]] = None # 目标特征列表
  72. # 时间信息
  73. created_at: str = "" # 创建时间
  74. processing_time_seconds: float = 0.0 # 处理耗时(秒)
  75. def to_dict(self) -> Dict[str, Any]:
  76. """转换为字典"""
  77. return {
  78. 'metadata': {
  79. 'stage': 'deconstruction',
  80. 'description': '完全匹配帖子的深度解构分析',
  81. 'post_id': self.post_id,
  82. 'target_features': self.target_features if self.target_features else '全部',
  83. 'total_matched_notes': self.total_matched_notes,
  84. 'processed_notes': self.processed_notes,
  85. 'skipped_notes': self.skipped_notes,
  86. 'success_count': self.success_count,
  87. 'failed_count': self.failed_count,
  88. 'api_url': self.api_url,
  89. 'min_score_threshold': self.min_score_threshold,
  90. 'sort_by': self.sort_by,
  91. 'created_at': self.created_at or datetime.now().isoformat(),
  92. 'processing_time_seconds': round(self.processing_time_seconds, 2)
  93. },
  94. 'results': [r.to_dict() for r in self.deconstruction_results]
  95. }
  96. @classmethod
  97. def from_json_file(cls, file_path: str) -> 'PostDeconstruction':
  98. """从JSON文件加载"""
  99. import json
  100. with open(file_path, 'r', encoding='utf-8') as f:
  101. data = json.load(f)
  102. metadata = data['metadata']
  103. results_data = data['results']
  104. # 重建DeconstructionResult对象
  105. results = []
  106. for r in results_data:
  107. # 重建特征列表
  108. inspiration_features = [
  109. DeconstructedFeature(**f) for f in r.get('inspiration_features', [])
  110. ]
  111. purpose_features = [
  112. DeconstructedFeature(**f) for f in r.get('purpose_features', [])
  113. ]
  114. key_point_features = [
  115. DeconstructedFeature(**f) for f in r.get('key_point_features', [])
  116. ]
  117. result = DeconstructionResult(
  118. note_id=r['note_id'],
  119. search_word=r['search_word'],
  120. original_feature=r['original_feature'],
  121. source_word=r['source_word'],
  122. evaluation_score=r['evaluation_score'],
  123. evaluation_type=r['evaluation_type'],
  124. evaluation_confidence=r['evaluation_confidence'],
  125. key_matching_points=r['key_matching_points'],
  126. inspiration_features=inspiration_features,
  127. purpose_features=purpose_features,
  128. key_point_features=key_point_features,
  129. note_data=r['note_data'],
  130. api_request=r['api_request'],
  131. api_response=r['api_response'],
  132. processed_at=r['processed_at'],
  133. processing_time_ms=r['processing_time_ms']
  134. )
  135. results.append(result)
  136. return cls(
  137. post_id=metadata['post_id'],
  138. deconstruction_results=results,
  139. total_matched_notes=metadata['total_matched_notes'],
  140. processed_notes=metadata['processed_notes'],
  141. skipped_notes=metadata['skipped_notes'],
  142. success_count=metadata['success_count'],
  143. failed_count=metadata['failed_count'],
  144. api_url=metadata['api_url'],
  145. min_score_threshold=metadata['min_score_threshold'],
  146. sort_by=metadata['sort_by'],
  147. target_features=metadata.get('target_features'),
  148. created_at=metadata['created_at'],
  149. processing_time_seconds=metadata['processing_time_seconds']
  150. )
  151. def get_statistics(self) -> Dict[str, Any]:
  152. """获取统计信息"""
  153. total_features = sum(r.feature_count for r in self.deconstruction_results)
  154. return {
  155. 'total_notes': len(self.deconstruction_results),
  156. 'total_features': total_features,
  157. 'avg_features_per_note': round(total_features / len(self.deconstruction_results), 1) if self.deconstruction_results else 0,
  158. 'success_rate': round(self.success_count / self.processed_notes, 3) if self.processed_notes > 0 else 0
  159. }