analyze_model_comparison.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 分析模型对比结果并导出到Excel
  5. 同时分析两种实现:
  6. 1. cache/text_embedding - 向量模型实现(text2vec)
  7. 2. cache/semantic_similarity - LLM实现(GPT/Claude等)
  8. 生成Excel报告,对比不同实现的效果差异。
  9. """
  10. import json
  11. import sys
  12. from pathlib import Path
  13. from typing import Dict, List, Tuple
  14. import pandas as pd
  15. from datetime import datetime
  16. # 添加项目根目录到路径
  17. project_root = Path(__file__).parent.parent.parent
  18. sys.path.insert(0, str(project_root))
  19. def extract_results_from_cache(
  20. cache_dir: str,
  21. model_type: str
  22. ) -> Dict[Tuple[str, str], Dict]:
  23. """
  24. 从缓存目录提取结果
  25. Args:
  26. cache_dir: 缓存目录路径
  27. model_type: 模型类型("text_embedding" 或 "semantic_similarity")
  28. Returns:
  29. 结果字典,键为 (phrase_a, phrase_b) 元组,值为结果数据
  30. """
  31. cache_path = Path(cache_dir)
  32. if not cache_path.exists():
  33. print(f"缓存目录不存在: {cache_dir}")
  34. return {}
  35. results = {}
  36. cache_files = list(cache_path.glob("*.json"))
  37. print(f"扫描 {model_type} 缓存: {len(cache_files)} 个文件")
  38. for cache_file in cache_files:
  39. try:
  40. with open(cache_file, 'r', encoding='utf-8') as f:
  41. data = json.load(f)
  42. # 提取输入和输出
  43. input_data = data.get("input", {})
  44. output_data = data.get("output", {})
  45. phrase_a = input_data.get("phrase_a")
  46. phrase_b = input_data.get("phrase_b")
  47. if phrase_a and phrase_b:
  48. # 统一处理两种缓存格式
  49. if model_type == "text_embedding":
  50. # text_embedding 的输出直接是 {"说明": "...", "相似度": 0.xx}
  51. result = {
  52. "相似度": output_data.get("相似度"),
  53. "说明": output_data.get("说明", ""),
  54. "模型": input_data.get("model_name", "unknown")
  55. }
  56. elif model_type == "semantic_similarity":
  57. # semantic_similarity 的输出在 output.parsed 中
  58. parsed = output_data.get("parsed", output_data)
  59. result = {
  60. "相似度": parsed.get("相似度"),
  61. "说明": parsed.get("说明", ""),
  62. "模型": input_data.get("model_name", "LLM")
  63. }
  64. else:
  65. continue
  66. # 使用原始顺序的元组作为键(保持 phrase_a 和 phrase_b 的原始顺序)
  67. pair_key = (phrase_a, phrase_b)
  68. # 如果同一对短语有多个缓存(不同模型),保存为列表
  69. if pair_key not in results:
  70. results[pair_key] = []
  71. results[pair_key].append({
  72. "phrase_a": phrase_a,
  73. "phrase_b": phrase_b,
  74. "相似度": result["相似度"],
  75. "说明": result["说明"],
  76. "模型": result["模型"]
  77. })
  78. except (json.JSONDecodeError, IOError, KeyError) as e:
  79. print(f" 读取缓存文件失败: {cache_file.name} - {e}")
  80. continue
  81. return results
  82. def merge_all_results(
  83. text_embedding_results: Dict[Tuple[str, str], List[Dict]],
  84. semantic_similarity_results: Dict[Tuple[str, str], List[Dict]]
  85. ) -> List[Dict]:
  86. """
  87. 合并所有结果
  88. Args:
  89. text_embedding_results: 向量模型结果
  90. semantic_similarity_results: LLM模型结果
  91. Returns:
  92. 合并后的结果列表
  93. """
  94. # 获取所有唯一的短语对
  95. all_pairs = set(text_embedding_results.keys()) | set(semantic_similarity_results.keys())
  96. merged = []
  97. for pair_key in all_pairs:
  98. phrase_a, phrase_b = pair_key
  99. row = {
  100. "短语A": phrase_a,
  101. "短语B": phrase_b,
  102. }
  103. # 添加向量模型结果
  104. if pair_key in text_embedding_results:
  105. for result in text_embedding_results[pair_key]:
  106. model_name = result["模型"].split('/')[-1] # 提取模型简称
  107. row[f"向量_{model_name}_相似度"] = result["相似度"]
  108. row[f"向量_{model_name}_说明"] = result["说明"]
  109. # 添加LLM模型结果
  110. if pair_key in semantic_similarity_results:
  111. for result in semantic_similarity_results[pair_key]:
  112. model_name = result["模型"].split('/')[-1] # 提取模型简称
  113. row[f"LLM_{model_name}_相似度"] = result["相似度"]
  114. row[f"LLM_{model_name}_说明"] = result["说明"]
  115. merged.append(row)
  116. return merged
  117. def create_comparison_dataframe(merged_results: List[Dict]) -> pd.DataFrame:
  118. """
  119. 创建模型对比数据框
  120. Args:
  121. merged_results: 合并后的结果列表
  122. Returns:
  123. 包含所有模型对比的DataFrame
  124. """
  125. # 直接从合并结果创建DataFrame
  126. df = pd.DataFrame(merged_results)
  127. # 添加序号
  128. df.insert(0, "序号", range(1, len(df) + 1))
  129. # 只保留相似度列,移除说明列
  130. columns_to_keep = ["序号", "短语A", "短语B"]
  131. similarity_cols = [col for col in df.columns if col.endswith("_相似度")]
  132. columns_to_keep.extend(similarity_cols)
  133. # 过滤出要保留的列
  134. df = df[[col for col in columns_to_keep if col in df.columns]]
  135. # 计算相似度差异
  136. for idx, row in df.iterrows():
  137. similarities = []
  138. for col in similarity_cols:
  139. if col in df.columns:
  140. sim = row[col]
  141. if sim is not None and isinstance(sim, (int, float)):
  142. similarities.append(sim)
  143. if len(similarities) > 1:
  144. df.at[idx, "相似度_差异"] = max(similarities) - min(similarities)
  145. elif len(similarities) == 1:
  146. df.at[idx, "相似度_差异"] = 0
  147. else:
  148. df.at[idx, "相似度_差异"] = None
  149. # 移动相似度差异列到最后
  150. if "相似度_差异" in df.columns:
  151. cols = [col for col in df.columns if col != "相似度_差异"]
  152. cols.append("相似度_差异")
  153. df = df[cols]
  154. return df
  155. def export_to_excel(
  156. comparison_df: pd.DataFrame,
  157. output_file: str
  158. ) -> None:
  159. """
  160. 导出到Excel文件
  161. Args:
  162. comparison_df: 对比数据框
  163. output_file: 输出Excel文件路径
  164. """
  165. output_path = Path(output_file)
  166. output_path.parent.mkdir(parents=True, exist_ok=True)
  167. # 创建Excel写入器
  168. with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
  169. # 写入完整对比数据
  170. comparison_df.to_excel(writer, sheet_name='模型对比', index=False)
  171. # 格式化工作表
  172. workbook = writer.book
  173. ws = writer.sheets['模型对比']
  174. # 自动调整列宽
  175. for col in ws.columns:
  176. max_length = 0
  177. column = col[0].column_letter
  178. for cell in col:
  179. try:
  180. if len(str(cell.value)) > max_length:
  181. max_length = len(str(cell.value))
  182. except:
  183. pass
  184. adjusted_width = min(max_length + 2, 50)
  185. ws.column_dimensions[column].width = adjusted_width
  186. print(f"Excel报告已导出到: {output_file}")
  187. def main():
  188. """主函数"""
  189. # 配置参数
  190. text_embedding_cache = "cache/text_embedding"
  191. semantic_similarity_cache = "cache/semantic_similarity"
  192. output_file = f"data/model_comparison_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
  193. print("=" * 60)
  194. print("模型对比结果分析(向量模型 vs LLM模型)")
  195. print("=" * 60)
  196. # 步骤 1: 从缓存提取结果
  197. print("\n步骤 1: 从缓存提取结果...")
  198. print(f" - 向量模型缓存: {text_embedding_cache}")
  199. text_embedding_results = extract_results_from_cache(text_embedding_cache, "text_embedding")
  200. print(f" 提取到 {len(text_embedding_results)} 个唯一短语对")
  201. print(f" - LLM模型缓存: {semantic_similarity_cache}")
  202. semantic_similarity_results = extract_results_from_cache(semantic_similarity_cache, "semantic_similarity")
  203. print(f" 提取到 {len(semantic_similarity_results)} 个唯一短语对")
  204. if not text_embedding_results and not semantic_similarity_results:
  205. print("\n错误: 未找到任何缓存数据")
  206. print("请先运行以下脚本生成缓存:")
  207. print(" - match_inspiration_features.py (生成 text_embedding 缓存)")
  208. print(" - test_all_models.py (生成多模型缓存)")
  209. return
  210. # 步骤 2: 合并结果
  211. print("\n步骤 2: 合并所有模型结果...")
  212. merged_results = merge_all_results(text_embedding_results, semantic_similarity_results)
  213. print(f"合并后共 {len(merged_results)} 个测试用例")
  214. # 步骤 3: 创建对比数据框
  215. print("\n步骤 3: 创建对比数据框...")
  216. comparison_df = create_comparison_dataframe(merged_results)
  217. print(f"对比数据框创建完成,共 {len(comparison_df)} 行")
  218. # 显示列信息
  219. similarity_cols = [col for col in comparison_df.columns if col.endswith("_相似度")]
  220. print(f"包含 {len(similarity_cols)} 个模型的相似度数据:")
  221. for col in similarity_cols:
  222. print(f" - {col}")
  223. # 步骤 4: 导出到Excel
  224. print("\n步骤 4: 导出到Excel...")
  225. export_to_excel(comparison_df, output_file)
  226. print("\n" + "=" * 60)
  227. print("分析完成!")
  228. print(f"报告文件: {output_file}")
  229. print("=" * 60)
  230. if __name__ == "__main__":
  231. main()