run_enhanced_search.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 增强搜索系统V2 - 快速启动脚本
  5. """
  6. import os
  7. import sys
  8. import logging
  9. from enhanced_search_v2 import EnhancedSearchV2
  10. # 配置日志
  11. logging.basicConfig(
  12. level=logging.INFO,
  13. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  14. handlers=[
  15. logging.FileHandler('search_execution.log', encoding='utf-8'),
  16. logging.StreamHandler()
  17. ]
  18. )
  19. logger = logging.getLogger(__name__)
  20. def check_files_exist():
  21. """检查必需文件是否存在"""
  22. required_files = [
  23. '69114f150000000007001f30_how copy.json',
  24. 'dimension_associations_analysis.json',
  25. 'optimized_clustered_data_gemini-3-pro-preview.json'
  26. ]
  27. missing_files = []
  28. for file in required_files:
  29. if not os.path.exists(file):
  30. missing_files.append(file)
  31. if missing_files:
  32. logger.error("缺少以下必需文件:")
  33. for file in missing_files:
  34. logger.error(f" - {file}")
  35. return False
  36. return True
  37. def check_api_key():
  38. """检查API密钥"""
  39. api_key = os.getenv('OPENROUTER_API_KEY')
  40. if not api_key:
  41. logger.error("未找到OPENROUTER_API_KEY环境变量")
  42. logger.error("请设置环境变量: export OPENROUTER_API_KEY='your_key'")
  43. return False
  44. logger.info(f"已找到API密钥: {api_key[:10]}...")
  45. return True
  46. def main():
  47. """主函数"""
  48. print("=" * 80)
  49. print("增强搜索系统 V2")
  50. print("=" * 80)
  51. print()
  52. # 检查环境
  53. logger.info("检查运行环境...")
  54. if not check_api_key():
  55. logger.error("环境检查失败:API密钥未设置")
  56. sys.exit(1)
  57. if not check_files_exist():
  58. logger.error("环境检查失败:缺少必需文件")
  59. sys.exit(1)
  60. logger.info("环境检查通过 ✓")
  61. print()
  62. # 初始化系统
  63. logger.info("初始化搜索系统...")
  64. try:
  65. system = EnhancedSearchV2(
  66. how_json_path='69114f150000000007001f30_how copy.json',
  67. dimension_associations_path='dimension_associations_analysis.json',
  68. optimized_clustered_data_path='optimized_clustered_data_gemini-3-pro-preview.json',
  69. output_dir='output_v2'
  70. )
  71. logger.info("系统初始化成功 ✓")
  72. print()
  73. except Exception as e:
  74. logger.error(f"系统初始化失败: {e}")
  75. sys.exit(1)
  76. # 运行完整流程
  77. logger.info("=" * 80)
  78. logger.info("开始执行7阶段搜索流程")
  79. logger.info("=" * 80)
  80. print()
  81. try:
  82. final_results = system.run_full_pipeline()
  83. print()
  84. logger.info("=" * 80)
  85. logger.info("搜索流程执行完成!")
  86. logger.info("=" * 80)
  87. print()
  88. # 显示结果摘要
  89. logger.info("输出文件位置: output_v2/")
  90. logger.info("- stage1_filtered_features.json")
  91. logger.info("- stage2_associations.json")
  92. logger.info("- stage3_features.json")
  93. logger.info("- stage4_with_llm_scores.json")
  94. logger.info("- stage5_with_search_results.json")
  95. logger.info("- stage6_with_evaluations.json")
  96. logger.info("- stage7_final_results.json")
  97. print()
  98. # 统计信息
  99. logger.info("执行统计:")
  100. stage1_count = len(final_results.get('结果', []))
  101. logger.info(f"- 筛选出的特征数: {stage1_count}")
  102. # 统计搜索词数量
  103. total_search_words = 0
  104. total_extensions = 0
  105. for result in final_results.get('结果', []):
  106. for assoc in result.get('找到的关联', []):
  107. for feature in assoc.get('特征列表', []):
  108. if feature.get('search_word'):
  109. total_search_words += 1
  110. total_extensions += len(feature.get('extended_searches', []))
  111. logger.info(f"- 生成的搜索词数: {total_search_words}")
  112. logger.info(f"- 扩展搜索数: {total_extensions}")
  113. return 0
  114. except KeyboardInterrupt:
  115. logger.warning("\n用户中断执行")
  116. return 1
  117. except Exception as e:
  118. logger.error(f"\n执行过程中出错: {e}", exc_info=True)
  119. return 1
  120. if __name__ == '__main__':
  121. sys.exit(main())