| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 增强搜索系统V2 - 快速启动脚本
- """
- import os
- import sys
- import logging
- from enhanced_search_v2 import EnhancedSearchV2
- # 配置日志
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- handlers=[
- logging.FileHandler('search_execution.log', encoding='utf-8'),
- logging.StreamHandler()
- ]
- )
- logger = logging.getLogger(__name__)
- def check_files_exist():
- """检查必需文件是否存在"""
- required_files = [
- '690d977d0000000007036331_how.json',
- 'dimension_associations_analysis.json',
- 'optimized_clustered_data_gemini-3-pro-preview.json'
- ]
- missing_files = []
- for file in required_files:
- if not os.path.exists(file):
- missing_files.append(file)
- if missing_files:
- logger.error("缺少以下必需文件:")
- for file in missing_files:
- logger.error(f" - {file}")
- return False
- return True
- def check_api_key():
- """检查API密钥"""
- api_key = os.getenv('OPENROUTER_API_KEY')
- if not api_key:
- logger.error("未找到OPENROUTER_API_KEY环境变量")
- logger.error("请设置环境变量: export OPENROUTER_API_KEY='your_key'")
- return False
- logger.info(f"已找到API密钥: {api_key[:10]}...")
- return True
- def main():
- """主函数"""
- print("=" * 80)
- print("增强搜索系统 V2")
- print("=" * 80)
- print()
- # 检查环境
- logger.info("检查运行环境...")
- if not check_api_key():
- logger.error("环境检查失败:API密钥未设置")
- sys.exit(1)
- if not check_files_exist():
- logger.error("环境检查失败:缺少必需文件")
- sys.exit(1)
- logger.info("环境检查通过 ✓")
- print()
- # 初始化系统
- logger.info("初始化搜索系统...")
- try:
- system = EnhancedSearchV2(
- how_json_path='690d977d0000000007036331_how.json',
- dimension_associations_path='dimension_associations_analysis.json',
- optimized_clustered_data_path='optimized_clustered_data_gemini-3-pro-preview.json',
- output_dir='output_v2'
- )
- logger.info("系统初始化成功 ✓")
- print()
- except Exception as e:
- logger.error(f"系统初始化失败: {e}")
- sys.exit(1)
- # 运行完整流程
- logger.info("=" * 80)
- logger.info("开始执行7阶段搜索流程")
- logger.info("=" * 80)
- print()
- try:
- final_results = system.run_full_pipeline()
- print()
- logger.info("=" * 80)
- logger.info("搜索流程执行完成!")
- logger.info("=" * 80)
- print()
- # 显示结果摘要
- logger.info("输出文件位置: output_v2/")
- logger.info("- stage1_filtered_features.json")
- logger.info("- stage2_associations.json")
- logger.info("- stage3_features.json")
- logger.info("- stage4_with_llm_scores.json")
- logger.info("- stage5_with_search_results.json")
- logger.info("- stage6_with_evaluations.json")
- logger.info("- stage7_final_results.json")
- print()
- # 统计信息
- logger.info("执行统计:")
- stage1_count = len(final_results.get('结果', []))
- logger.info(f"- 筛选出的特征数: {stage1_count}")
- # 统计搜索词数量
- total_search_words = 0
- total_extensions = 0
- for result in final_results.get('结果', []):
- for assoc in result.get('找到的关联', []):
- for feature in assoc.get('特征列表', []):
- if feature.get('search_word'):
- total_search_words += 1
- total_extensions += len(feature.get('extended_searches', []))
- logger.info(f"- 生成的搜索词数: {total_search_words}")
- logger.info(f"- 扩展搜索数: {total_extensions}")
- return 0
- except KeyboardInterrupt:
- logger.warning("\n用户中断执行")
- return 1
- except Exception as e:
- logger.error(f"\n执行过程中出错: {e}", exc_info=True)
- return 1
- if __name__ == '__main__':
- sys.exit(main())
|