|
|
@@ -498,6 +498,42 @@ class Stage7DeconstructionAnalyzer:
|
|
|
logger.info(f" 成功: {success_count}")
|
|
|
logger.info(f" 失败: {failed_count}")
|
|
|
|
|
|
+ # 6.5. 加载已有结果(如果存在)并合并
|
|
|
+ existing_results = []
|
|
|
+ if os.path.exists(output_path):
|
|
|
+ logger.info(f"\n 检测到已有结果文件,准备合并...")
|
|
|
+ try:
|
|
|
+ with open(output_path, 'r', encoding='utf-8') as f:
|
|
|
+ existing_data = json.load(f)
|
|
|
+ existing_results = existing_data.get('results', [])
|
|
|
+ logger.info(f" 已有结果数: {len(existing_results)}")
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f" 加载已有结果失败: {e},将覆盖写入")
|
|
|
+ existing_results = []
|
|
|
+
|
|
|
+ # 6.6. 合并新旧结果(基于 note_id 去重)
|
|
|
+ if existing_results:
|
|
|
+ # 建立已有结果的 note_id 索引
|
|
|
+ existing_note_ids = {r['note_id']: r for r in existing_results}
|
|
|
+
|
|
|
+ # 统计更新数量
|
|
|
+ updated_count = 0
|
|
|
+ for new_result in results:
|
|
|
+ if new_result['note_id'] in existing_note_ids:
|
|
|
+ updated_count += 1
|
|
|
+ # 用新结果更新已有结果(新结果优先)
|
|
|
+ existing_note_ids[new_result['note_id']] = new_result
|
|
|
+
|
|
|
+ # 合并后的完整结果
|
|
|
+ merged_results = list(existing_note_ids.values())
|
|
|
+
|
|
|
+ logger.info(f" 合并后总结果数: {len(merged_results)}")
|
|
|
+ logger.info(f" 本次新增: {len(results) - updated_count} 条")
|
|
|
+ logger.info(f" 本次更新: {updated_count} 条")
|
|
|
+ else:
|
|
|
+ merged_results = results
|
|
|
+ logger.info(f" 无已有结果,直接保存")
|
|
|
+
|
|
|
# 7. 构建最终结果
|
|
|
final_result = {
|
|
|
'metadata': {
|
|
|
@@ -506,6 +542,8 @@ class Stage7DeconstructionAnalyzer:
|
|
|
'target_features': self.target_features if self.target_features else '全部',
|
|
|
'total_matched_notes': total_matched,
|
|
|
'processed_notes': len(results),
|
|
|
+ 'total_results_count': len(merged_results),
|
|
|
+ 'new_results_count': len(results),
|
|
|
'skipped_notes': self.skip_count,
|
|
|
'max_notes_limit': self.max_notes,
|
|
|
'sort_by': self.sort_by,
|
|
|
@@ -516,7 +554,7 @@ class Stage7DeconstructionAnalyzer:
|
|
|
'created_at': datetime.now().isoformat(),
|
|
|
'processing_time_seconds': round(processing_time, 2)
|
|
|
},
|
|
|
- 'results': results
|
|
|
+ 'results': merged_results
|
|
|
}
|
|
|
|
|
|
# 8. 保存结果
|