analyze_model_comparison.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 分析模型对比结果并导出到Excel
  5. 同时分析两种实现:
  6. 1. cache/text_embedding - 向量模型实现(text2vec)
  7. 2. cache/semantic_similarity - LLM实现(GPT/Claude等)
  8. 生成Excel报告,对比不同实现的效果差异。
  9. """
  10. import json
  11. import sys
  12. from pathlib import Path
  13. from typing import Dict, List, Tuple
  14. import pandas as pd
  15. from datetime import datetime
  16. # 添加项目根目录到路径
  17. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  18. from lib.config import get_cache_dir
  19. project_root = Path(__file__).parent.parent.parent
  20. sys.path.insert(0, str(project_root))
  21. def extract_results_from_cache(
  22. cache_dir: str,
  23. model_type: str
  24. ) -> Dict[Tuple[str, str], Dict]:
  25. """
  26. 从缓存目录提取结果
  27. Args:
  28. cache_dir: 缓存目录路径
  29. model_type: 模型类型("text_embedding" 或 "semantic_similarity")
  30. Returns:
  31. 结果字典,键为 (phrase_a, phrase_b) 元组,值为结果数据
  32. """
  33. cache_path = Path(cache_dir)
  34. if not cache_path.exists():
  35. print(f"缓存目录不存在: {cache_dir}")
  36. return {}
  37. results = {}
  38. cache_files = list(cache_path.glob("*.json"))
  39. print(f"扫描 {model_type} 缓存: {len(cache_files)} 个文件")
  40. for cache_file in cache_files:
  41. try:
  42. with open(cache_file, 'r', encoding='utf-8') as f:
  43. data = json.load(f)
  44. # 提取输入和输出
  45. input_data = data.get("input", {})
  46. output_data = data.get("output", {})
  47. phrase_a = input_data.get("phrase_a")
  48. phrase_b = input_data.get("phrase_b")
  49. if phrase_a and phrase_b:
  50. # 统一处理两种缓存格式
  51. if model_type == "text_embedding":
  52. # text_embedding 的输出直接是 {"说明": "...", "相似度": 0.xx}
  53. result = {
  54. "相似度": output_data.get("相似度"),
  55. "说明": output_data.get("说明", ""),
  56. "模型": input_data.get("model_name", "unknown")
  57. }
  58. elif model_type == "semantic_similarity":
  59. # semantic_similarity 的输出在 output.parsed 中
  60. parsed = output_data.get("parsed", output_data)
  61. result = {
  62. "相似度": parsed.get("相似度"),
  63. "说明": parsed.get("说明", ""),
  64. "模型": input_data.get("model_name", "LLM")
  65. }
  66. else:
  67. continue
  68. # 使用原始顺序的元组作为键(保持 phrase_a 和 phrase_b 的原始顺序)
  69. pair_key = (phrase_a, phrase_b)
  70. # 如果同一对短语有多个缓存(不同模型),保存为列表
  71. if pair_key not in results:
  72. results[pair_key] = []
  73. results[pair_key].append({
  74. "phrase_a": phrase_a,
  75. "phrase_b": phrase_b,
  76. "相似度": result["相似度"],
  77. "说明": result["说明"],
  78. "模型": result["模型"]
  79. })
  80. except (json.JSONDecodeError, IOError, KeyError) as e:
  81. print(f" 读取缓存文件失败: {cache_file.name} - {e}")
  82. continue
  83. return results
  84. def merge_all_results(
  85. text_embedding_results: Dict[Tuple[str, str], List[Dict]],
  86. semantic_similarity_results: Dict[Tuple[str, str], List[Dict]]
  87. ) -> List[Dict]:
  88. """
  89. 合并所有结果
  90. Args:
  91. text_embedding_results: 向量模型结果
  92. semantic_similarity_results: LLM模型结果
  93. Returns:
  94. 合并后的结果列表
  95. """
  96. # 获取所有唯一的短语对
  97. all_pairs = set(text_embedding_results.keys()) | set(semantic_similarity_results.keys())
  98. merged = []
  99. for pair_key in all_pairs:
  100. phrase_a, phrase_b = pair_key
  101. row = {
  102. "短语A": phrase_a,
  103. "短语B": phrase_b,
  104. }
  105. # 添加向量模型结果
  106. if pair_key in text_embedding_results:
  107. for result in text_embedding_results[pair_key]:
  108. model_name = result["模型"].split('/')[-1] # 提取模型简称
  109. row[f"向量_{model_name}_相似度"] = result["相似度"]
  110. row[f"向量_{model_name}_说明"] = result["说明"]
  111. # 添加LLM模型结果
  112. if pair_key in semantic_similarity_results:
  113. for result in semantic_similarity_results[pair_key]:
  114. model_name = result["模型"].split('/')[-1] # 提取模型简称
  115. row[f"LLM_{model_name}_相似度"] = result["相似度"]
  116. row[f"LLM_{model_name}_说明"] = result["说明"]
  117. merged.append(row)
  118. return merged
  119. def create_comparison_dataframe(merged_results: List[Dict]) -> pd.DataFrame:
  120. """
  121. 创建模型对比数据框
  122. Args:
  123. merged_results: 合并后的结果列表
  124. Returns:
  125. 包含所有模型对比的DataFrame
  126. """
  127. # 直接从合并结果创建DataFrame
  128. df = pd.DataFrame(merged_results)
  129. # 添加序号
  130. df.insert(0, "序号", range(1, len(df) + 1))
  131. # 只保留相似度列,移除说明列
  132. columns_to_keep = ["序号", "短语A", "短语B"]
  133. similarity_cols = [col for col in df.columns if col.endswith("_相似度")]
  134. columns_to_keep.extend(similarity_cols)
  135. # 过滤出要保留的列
  136. df = df[[col for col in columns_to_keep if col in df.columns]]
  137. # 计算相似度差异
  138. for idx, row in df.iterrows():
  139. similarities = []
  140. for col in similarity_cols:
  141. if col in df.columns:
  142. sim = row[col]
  143. if sim is not None and isinstance(sim, (int, float)):
  144. similarities.append(sim)
  145. if len(similarities) > 1:
  146. df.at[idx, "相似度_差异"] = max(similarities) - min(similarities)
  147. elif len(similarities) == 1:
  148. df.at[idx, "相似度_差异"] = 0
  149. else:
  150. df.at[idx, "相似度_差异"] = None
  151. # 移动相似度差异列到最后
  152. if "相似度_差异" in df.columns:
  153. cols = [col for col in df.columns if col != "相似度_差异"]
  154. cols.append("相似度_差异")
  155. df = df[cols]
  156. return df
  157. def export_to_excel(
  158. comparison_df: pd.DataFrame,
  159. output_file: str
  160. ) -> None:
  161. """
  162. 导出到Excel文件
  163. Args:
  164. comparison_df: 对比数据框
  165. output_file: 输出Excel文件路径
  166. """
  167. output_path = Path(output_file)
  168. output_path.parent.mkdir(parents=True, exist_ok=True)
  169. # 创建Excel写入器
  170. with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
  171. # 写入完整对比数据
  172. comparison_df.to_excel(writer, sheet_name='模型对比', index=False)
  173. # 格式化工作表
  174. workbook = writer.book
  175. ws = writer.sheets['模型对比']
  176. # 自动调整列宽
  177. for col in ws.columns:
  178. max_length = 0
  179. column = col[0].column_letter
  180. for cell in col:
  181. try:
  182. if len(str(cell.value)) > max_length:
  183. max_length = len(str(cell.value))
  184. except:
  185. pass
  186. adjusted_width = min(max_length + 2, 50)
  187. ws.column_dimensions[column].width = adjusted_width
  188. print(f"Excel报告已导出到: {output_file}")
  189. def main():
  190. """主函数"""
  191. # 配置参数(从配置模块获取)
  192. text_embedding_cache = get_cache_dir("text_embedding")
  193. semantic_similarity_cache = get_cache_dir("semantic_similarity")
  194. output_file = f"data/model_comparison_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
  195. print("=" * 60)
  196. print("模型对比结果分析(向量模型 vs LLM模型)")
  197. print("=" * 60)
  198. # 步骤 1: 从缓存提取结果
  199. print("\n步骤 1: 从缓存提取结果...")
  200. print(f" - 向量模型缓存: {text_embedding_cache}")
  201. text_embedding_results = extract_results_from_cache(text_embedding_cache, "text_embedding")
  202. print(f" 提取到 {len(text_embedding_results)} 个唯一短语对")
  203. print(f" - LLM模型缓存: {semantic_similarity_cache}")
  204. semantic_similarity_results = extract_results_from_cache(semantic_similarity_cache, "semantic_similarity")
  205. print(f" 提取到 {len(semantic_similarity_results)} 个唯一短语对")
  206. if not text_embedding_results and not semantic_similarity_results:
  207. print("\n错误: 未找到任何缓存数据")
  208. print("请先运行以下脚本生成缓存:")
  209. print(" - match_inspiration_features.py (生成 text_embedding 缓存)")
  210. print(" - test_all_models.py (生成多模型缓存)")
  211. return
  212. # 步骤 2: 合并结果
  213. print("\n步骤 2: 合并所有模型结果...")
  214. merged_results = merge_all_results(text_embedding_results, semantic_similarity_results)
  215. print(f"合并后共 {len(merged_results)} 个测试用例")
  216. # 步骤 3: 创建对比数据框
  217. print("\n步骤 3: 创建对比数据框...")
  218. comparison_df = create_comparison_dataframe(merged_results)
  219. print(f"对比数据框创建完成,共 {len(comparison_df)} 行")
  220. # 显示列信息
  221. similarity_cols = [col for col in comparison_df.columns if col.endswith("_相似度")]
  222. print(f"包含 {len(similarity_cols)} 个模型的相似度数据:")
  223. for col in similarity_cols:
  224. print(f" - {col}")
  225. # 步骤 4: 导出到Excel
  226. print("\n步骤 4: 导出到Excel...")
  227. export_to_excel(comparison_df, output_file)
  228. print("\n" + "=" * 60)
  229. print("分析完成!")
  230. print(f"报告文件: {output_file}")
  231. print("=" * 60)
  232. if __name__ == "__main__":
  233. main()