#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 增强搜索系统V2 - 快速启动脚本 """ import os import sys import logging from enhanced_search_v2 import EnhancedSearchV2 # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('search_execution.log', encoding='utf-8'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def check_files_exist(): """检查必需文件是否存在""" required_files = [ '69114f150000000007001f30_how copy.json', 'dimension_associations_analysis.json', 'optimized_clustered_data_gemini-3-pro-preview.json' ] missing_files = [] for file in required_files: if not os.path.exists(file): missing_files.append(file) if missing_files: logger.error("缺少以下必需文件:") for file in missing_files: logger.error(f" - {file}") return False return True def check_api_key(): """检查API密钥""" api_key = os.getenv('OPENROUTER_API_KEY') if not api_key: logger.error("未找到OPENROUTER_API_KEY环境变量") logger.error("请设置环境变量: export OPENROUTER_API_KEY='your_key'") return False logger.info(f"已找到API密钥: {api_key[:10]}...") return True def main(): """主函数""" print("=" * 80) print("增强搜索系统 V2") print("=" * 80) print() # 检查环境 logger.info("检查运行环境...") if not check_api_key(): logger.error("环境检查失败:API密钥未设置") sys.exit(1) if not check_files_exist(): logger.error("环境检查失败:缺少必需文件") sys.exit(1) logger.info("环境检查通过 ✓") print() # 初始化系统 logger.info("初始化搜索系统...") try: system = EnhancedSearchV2( how_json_path='69114f150000000007001f30_how copy.json', dimension_associations_path='dimension_associations_analysis.json', optimized_clustered_data_path='optimized_clustered_data_gemini-3-pro-preview.json', output_dir='output_v2' ) logger.info("系统初始化成功 ✓") print() except Exception as e: logger.error(f"系统初始化失败: {e}") sys.exit(1) # 运行完整流程 logger.info("=" * 80) logger.info("开始执行7阶段搜索流程") logger.info("=" * 80) print() try: final_results = system.run_full_pipeline() print() logger.info("=" * 80) logger.info("搜索流程执行完成!") logger.info("=" * 80) print() # 显示结果摘要 logger.info("输出文件位置: output_v2/") logger.info("- stage1_filtered_features.json") logger.info("- stage2_associations.json") logger.info("- stage3_features.json") logger.info("- stage4_with_llm_scores.json") logger.info("- stage5_with_search_results.json") logger.info("- stage6_with_evaluations.json") logger.info("- stage7_final_results.json") print() # 统计信息 logger.info("执行统计:") stage1_count = len(final_results.get('结果', [])) logger.info(f"- 筛选出的特征数: {stage1_count}") # 统计搜索词数量 total_search_words = 0 total_extensions = 0 for result in final_results.get('结果', []): for assoc in result.get('找到的关联', []): for feature in assoc.get('特征列表', []): if feature.get('search_word'): total_search_words += 1 total_extensions += len(feature.get('extended_searches', [])) logger.info(f"- 生成的搜索词数: {total_search_words}") logger.info(f"- 扩展搜索数: {total_extensions}") return 0 except KeyboardInterrupt: logger.warning("\n用户中断执行") return 1 except Exception as e: logger.error(f"\n执行过程中出错: {e}", exc_info=True) return 1 if __name__ == '__main__': sys.exit(main())