1 tydzień temu · d2a2a175ce
--- a/SETUP_ENV.md
+++ b/SETUP_ENV.md
@@ -0,0 +1,141 @@
 
															+# 虚拟环境设置指南
														
 
															+
														
 
															+## 创建虚拟环境
														
 
															+
														
 
															+### 方法1：使用 venv（推荐）
														
 
															+
														
 
															+```bash
														
 
															+# 创建虚拟环境（在当前目录下创建 .venv 文件夹）
														
 
															+python3 -m venv .venv
														
 
															+
														
 
															+# 或者指定名称
														
 
															+python3 -m venv venv
														
 
															+```
														
 
															+
														
 
															+### 方法2：使用 virtualenv
														
 
															+
														
 
															+```bash
														
 
															+# 先安装 virtualenv（如果未安装）
														
 
															+pip install virtualenv
														
 
															+
														
 
															+# 创建虚拟环境
														
 
															+virtualenv venv
														
 
															+```
														
 
															+
														
 
															+## 激活虚拟环境
														
 
															+
														
 
															+### macOS/Linux
														
 
															+
														
 
															+```bash
														
 
															+# 激活虚拟环境
														
 
															+source .venv/bin/activate
														
 
															+
														
 
															+# 或者如果命名为 venv
														
 
															+source venv/bin/activate
														
 
															+```
														
 
															+
														
 
															+激活成功后，终端提示符前会显示 `(venv)` 或 `(.venv)`
														
 
															+
														
 
															+### Windows
														
 
															+
														
 
															+```bash
														
 
															+# 激活虚拟环境
														
 
															+.venv\Scripts\activate
														
 
															+
														
 
															+# 或者如果命名为 venv
														
 
															+venv\Scripts\activate
														
 
															+```
														
 
															+
														
 
															+## 停用虚拟环境
														
 
															+
														
 
															+```bash
														
 
															+deactivate
														
 
															+```
														
 
															+
														
 
															+## 完整设置流程
														
 
															+
														
 
															+```bash
														
 
															+# 1. 创建虚拟环境
														
 
															+python3 -m venv .venv
														
 
															+
														
 
															+# 2. 激活虚拟环境
														
 
															+source .venv/bin/activate
														
 
															+
														
 
															+# 3. 升级pip（可选但推荐）
														
 
															+pip install --upgrade pip
														
 
															+
														
 
															+# 4. 安装项目依赖
														
 
															+pip install -r requirements.txt
														
 
															+
														
 
															+# 5. 设置环境变量
														
 
															+export OPENROUTER_API_KEY='your-api-key-here'
														
 
															+
														
 
															+# 6. 验证安装
														
 
															+python --version
														
 
															+pip list
														
 
															+```
														
 
															+
														
 
															+## 检查虚拟环境是否激活
														
 
															+
														
 
															+```bash
														
 
															+# 方法1：查看Python路径
														
 
															+which python
														
 
															+# 应该显示虚拟环境路径，如：/path/to/project/.venv/bin/python
														
 
															+
														
 
															+# 方法2：查看pip路径
														
 
															+which pip
														
 
															+# 应该显示虚拟环境路径，如：/path/to/project/.venv/bin/pip
														
 
															+
														
 
															+# 方法3：查看环境变量
														
 
															+echo $VIRTUAL_ENV
														
 
															+# 应该显示虚拟环境路径
														
 
															+```
														
 
															+
														
 
															+## 常见问题
														
 
															+
														
 
															+### 1. 找不到 python3 命令
														
 
															+
														
 
															+```bash
														
 
															+# 尝试使用 python
														
 
															+python -m venv .venv
														
 
															+
														
 
															+# 或者查找Python安装路径
														
 
															+which python3
														
 
															+```
														
 
															+
														
 
															+### 2. 权限错误
														
 
															+
														
 
															+```bash
														
 
															+# 使用 sudo（不推荐，但有时需要）
														
 
															+sudo python3 -m venv .venv
														
 
															+```
														
 
															+
														
 
															+### 3. 虚拟环境已存在
														
 
															+
														
 
															+```bash
														
 
															+# 删除旧虚拟环境
														
 
															+rm -rf .venv
														
 
															+
														
 
															+# 重新创建
														
 
															+python3 -m venv .venv
														
 
															+```
														
 
															+
														
 
															+## 项目特定设置
														
 
															+
														
 
															+对于本项目，建议使用 `.venv` 作为虚拟环境名称（已在 .gitignore 中忽略）
														
 
															+
														
 
															+```bash
														
 
															+# 创建虚拟环境
														
 
															+python3 -m venv .venv
														
 
															+
														
 
															+# 激活虚拟环境
														
 
															+source .venv/bin/activate
														
 
															+
														
 
															+# 安装依赖
														
 
															+pip install -r requirements.txt
														
 
															+
														
 
															+# 设置API密钥
														
 
															+export OPENROUTER_API_KEY='your-api-key-here'
														
 
															+```
														
 
															+
														
 
															+
														
--- a/api/config.py
+++ b/api/config.py
@@ -14,7 +14,11 @@ class APIConfig:
 
															     # API服务配置
														
 
															     API_HOST: str = os.getenv("API_HOST", "0.0.0.0")
														
 
															     API_PORT: int = int(os.getenv("API_PORT", "8001"))
														
 
															-    
														
 
															+
														
 
															+    # 并发控制配置
														
 
															+    MAX_CONCURRENT_REQUESTS: int = int(os.getenv("MAX_CONCURRENT_REQUESTS", "5"))
														
 
															+    TEMP_REQUESTS_DIR: str = os.getenv("TEMP_REQUESTS_DIR", "temp_requests")
														
 
															+
														
 
															     # Pipeline配置
														
 
															     OPENROUTER_API_KEY: Optional[str] = os.getenv("OPENROUTER_API_KEY")
														
 
															     OUTPUT_DIR: str = os.getenv("OUTPUT_DIR", "output_v2")
														
--- a/api/pipeline_wrapper.py
+++ b/api/pipeline_wrapper.py
@@ -19,25 +19,30 @@ logger = logging.getLogger(__name__)
 
															 class PipelineWrapper:
														
 
															     """Pipeline包装器，复用阶段3-7"""
														
 
															-    
														
 
															-    def __init__(self):
														
 
															-        """初始化Pipeline包装器"""
														
 
															-        # 创建临时输出目录
														
 
															-        self.temp_output_dir = tempfile.mkdtemp(prefix='api_pipeline_')
														
 
															-        logger.info(f"创建临时输出目录: {self.temp_output_dir}")
														
 
															-        
														
 
															-        # 初始化EnhancedSearchV2实例
														
 
															-        # 注意：how_json_path参数是必需的，但我们不会使用它（因为我们跳过阶段1-2）
														
 
															-        # 创建一个空的临时文件作为占位符
														
 
															-        temp_how_file = os.path.join(self.temp_output_dir, 'temp_how.json')
														
 
															+
														
 
															+    def __init__(self, output_dir: str, request_id: str = None):
														
 
															+        """
														
 
															+        初始化Pipeline包装器
														
 
															+
														
 
															+        Args:
														
 
															+            output_dir: 请求专用的输出目录（由RequestContext提供）
														
 
															+            request_id: 请求ID（用于日志标识）
														
 
															+        """
														
 
															+        self.output_dir = output_dir
														
 
															+        self.request_id = request_id or "unknown"
														
 
															+
														
 
															+        logger.info(f"[{self.request_id}] 初始化Pipeline，输出目录: {output_dir}")
														
 
															+
														
 
															+        # 创建占位符how.json（API模式不需要真实文件）
														
 
															+        temp_how_file = os.path.join(output_dir, 'placeholder_how.json')
														
 
															         with open(temp_how_file, 'w', encoding='utf-8') as f:
														
 
															-            import json
														
 
															             json.dump({'解构结果': {}}, f)
														
 
															-        
														
 
															+
														
 
															+        # 初始化EnhancedSearchV2实例，使用传入的output_dir
														
 
															         self.pipeline = EnhancedSearchV2(
														
 
															             how_json_path=temp_how_file,  # 占位符文件，实际不会使用
														
 
															             openrouter_api_key=APIConfig.OPENROUTER_API_KEY,
														
 
															-            output_dir=self.temp_output_dir,
														
 
															+            output_dir=output_dir,  # 使用独立目录
														
 
															             top_n=10,
														
 
															             max_total_searches=APIConfig.MAX_TOTAL_SEARCHES,
														
 
															             search_max_workers=APIConfig.SEARCH_MAX_WORKERS,
														
@@ -60,22 +65,22 @@ class PipelineWrapper:
 
															             similarity_max_workers=APIConfig.SIMILARITY_MAX_WORKERS,
														
 
															             similarity_min_similarity=APIConfig.SIMILARITY_MIN_SIMILARITY
														
 
															         )
														
 
															-        
														
 
															-        logger.info("Pipeline包装器初始化完成")
														
 
															-    
														
 
															-    async def run_stages_3_to_7(
														
 
															+
														
 
															+        logger.info(f"[{self.request_id}] Pipeline包装器初始化完成")
														
 
															+
														
 
															+    def run_stages_3_to_7_sync(
														
 
															         self,
														
 
															         features_data: List[Dict[str, Any]]
														
 
															     ) -> Dict[str, Any]:
														
 
															         """
														
 
															-        执行阶段3-7的完整流程
														
 
															-        
														
 
															+        执行阶段3-7的完整流程（同步版本，用于在线程池中执行）
														
 
															+
														
 
															         Args:
														
 
															             features_data: 阶段2的输出格式数据（candidate_words.json格式）
														
 
															-        
														
 
															+
														
 
															         Returns:
														
 
															             包含阶段3-7结果的字典
														
 
															-        
														
 
															+
														
 
															         Raises:
														
 
															             Exception: 当任何阶段执行失败时
														
 
															         """
														
@@ -132,19 +137,19 @@ class PipelineWrapper:
 
															             # 阶段7：相似度分析
														
 
															             logger.info("阶段7：相似度分析...")
														
 
															             try:
														
 
															-                # 在异步环境中直接调用run_async而不是run
														
 
															-                similarity_results = await self.pipeline.similarity_analyzer.run_async(
														
 
															+                # 同步版本使用run方法
														
 
															+                similarity_results = self.pipeline.similarity_analyzer.run(
														
 
															                     deep_results,
														
 
															-                    output_path=os.path.join(self.temp_output_dir, "similarity_analysis_results.json")
														
 
															+                    output_path=os.path.join(self.output_dir, "similarity_analysis_results.json")
														
 
															                 )
														
 
															             except Exception as e:
														
 
															                 logger.error(f"阶段7执行失败: {e}", exc_info=True)
														
 
															                 raise Exception(f"相似度分析失败: {str(e)}")
														
 
															-            
														
 
															+
														
 
															             # 重要：similarity_analyzer.run_async会更新文件中的evaluation_results（添加comprehensive_score）
														
 
															             # 需要重新加载更新后的文件，因为内存中的evaluation_results变量还没有被更新
														
 
															             logger.info("重新加载更新后的评估结果（包含comprehensive_score）...")
														
 
															-            evaluated_results_path = os.path.join(self.temp_output_dir, "evaluated_results.json")
														
 
															+            evaluated_results_path = os.path.join(self.output_dir, "evaluated_results.json")
														
 
															             if os.path.exists(evaluated_results_path):
														
 
															                 with open(evaluated_results_path, 'r', encoding='utf-8') as f:
														
 
															                     evaluation_results = json.load(f)
														
@@ -165,14 +170,4 @@ class PipelineWrapper:
 
															         except Exception as e:
														
 
															             logger.error(f"执行阶段3-7失败: {e}", exc_info=True)
														
 
															             raise
														
 
															-    
														
 
															-    def cleanup(self):
														
 
															-        """清理临时文件"""
														
 
															-        try:
														
 
															-            import shutil
														
 
															-            if os.path.exists(self.temp_output_dir):
														
 
															-                shutil.rmtree(self.temp_output_dir)
														
 
															-                logger.info(f"已清理临时目录: {self.temp_output_dir}")
														
 
															-        except Exception as e:
														
 
															-            logger.warning(f"清理临时目录失败: {e}")
														
--- a/api/request_context.py
+++ b/api/request_context.py
@@ -0,0 +1,56 @@
 
															+#!/usr/bin/env python3
														
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+请求上下文管理器
														
 
															+为每个API请求提供独立的临时工作空间
														
 
															+"""
														
 
															+
														
 
															+import uuid
														
 
															+import shutil
														
 
															+from pathlib import Path
														
 
															+import logging
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+
														
 
															+class RequestContextManager:
														
 
															+    """请求级别的临时工作空间管理器"""
														
 
															+
														
 
															+    def __init__(self, base_dir: str = "temp_requests"):
														
 
															+        """
														
 
															+        初始化请求上下文管理器
														
 
															+
														
 
															+        Args:
														
 
															+            base_dir: 临时目录基础路径（项目内相对路径）
														
 
															+        """
														
 
															+        self.base_dir = Path(base_dir)
														
 
															+        self.request_id = str(uuid.uuid4())[:8]  # 短UUID，便于日志查看
														
 
															+        self.work_dir = None
														
 
															+
														
 
															+    async def __aenter__(self):
														
 
															+        """
														
 
															+        创建请求专用工作目录
														
 
															+
														
 
															+        Returns:
														
 
															+            self: 返回自身，提供request_id和work_dir访问
														
 
															+        """
														
 
															+        self.work_dir = self.base_dir / f"request_{self.request_id}"
														
 
															+        self.work_dir.mkdir(parents=True, exist_ok=True)
														
 
															+        logger.info(f"[{self.request_id}] 创建工作目录: {self.work_dir}")
														
 
															+        return self
														
 
															+
														
 
															+    async def __aexit__(self, exc_type, exc_val, exc_tb):
														
 
															+        """
														
 
															+        清理工作目录
														
 
															+
														
 
															+        Args:
														
 
															+            exc_type: 异常类型
														
 
															+            exc_val: 异常值
														
 
															+            exc_tb: 异常追踪
														
 
															+        """
														
 
															+        if self.work_dir and self.work_dir.exists():
														
 
															+            try:
														
 
															+                shutil.rmtree(self.work_dir)
														
 
															+                logger.info(f"[{self.request_id}] 已清理工作目录")
														
 
															+            except Exception as e:
														
 
															+                logger.warning(f"[{self.request_id}] 清理工作目录失败: {e}")
														
--- a/api/search_service.py
+++ b/api/search_service.py
@@ -5,8 +5,10 @@ FastAPI服务主文件
 
															 提供搜索API端点
														
 
															 """
														
 
															+import asyncio
														
 
															 import logging
														
 
															 from typing import List, Dict, Any
														
 
															+from concurrent.futures import ThreadPoolExecutor
														
 
															 from fastapi import FastAPI, HTTPException
														
 
															 from pydantic import BaseModel, Field
														
@@ -15,6 +17,7 @@ from api.data_converter import (
 
															     convert_pipeline_output_to_api_response
														
 
															 )
														
 
															 from api.pipeline_wrapper import PipelineWrapper
														
 
															+from api.request_context import RequestContextManager
														
 
															 logger = logging.getLogger(__name__)
														
@@ -25,8 +28,10 @@ app = FastAPI(
 
															     version="1.0.0"
														
 
															 )
														
 
															-# 全局Pipeline包装器实例
														
 
															-pipeline_wrapper: PipelineWrapper = None
														
 
															+# 并发控制
														
 
															+REQUEST_SEMAPHORE = None
														
 
															+MAX_CONCURRENT_REQUESTS = 5
														
 
															+EXECUTOR = None  # 线程池执行器，用于运行阻塞的pipeline代码
														
 
															 # 请求模型
														
@@ -78,120 +83,132 @@ class SearchResponse(BaseModel):
 
															 @app.on_event("startup")
														
 
															 async def startup_event():
														
 
															-    """应用启动时初始化Pipeline包装器"""
														
 
															-    global pipeline_wrapper
														
 
															-    try:
														
 
															-        pipeline_wrapper = PipelineWrapper()
														
 
															-        logger.info("Pipeline包装器初始化成功")
														
 
															-    except Exception as e:
														
 
															-        logger.error(f"Pipeline包装器初始化失败: {e}", exc_info=True)
														
 
															-        raise
														
 
															+    """应用启动时初始化并发限流器和线程池"""
														
 
															+    global REQUEST_SEMAPHORE, EXECUTOR
														
 
															+    REQUEST_SEMAPHORE = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
														
 
															+    EXECUTOR = ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS)
														
 
															+    logger.info(f"API服务已启动，最大并发请求数: {MAX_CONCURRENT_REQUESTS}")
														
 
															 @app.on_event("shutdown")
														
 
															 async def shutdown_event():
														
 
															     """应用关闭时清理资源"""
														
 
															-    global pipeline_wrapper
														
 
															-    if pipeline_wrapper:
														
 
															-        try:
														
 
															-            pipeline_wrapper.cleanup()
														
 
															-            logger.info("Pipeline包装器清理完成")
														
 
															-        except Exception as e:
														
 
															-            logger.warning(f"Pipeline包装器清理失败: {e}")
														
 
															+    global EXECUTOR
														
 
															+    if EXECUTOR:
														
 
															+        EXECUTOR.shutdown(wait=True)
														
 
															+        logger.info("线程池已关闭")
														
 
															+    logger.info("API服务关闭")
														
 
															 @app.post("/what/search", response_model=SearchResponse)
														
 
															 async def search(request: SearchRequest):
														
 
															     """
														
 
															-    执行搜索和评估
														
 
															-    
														
 
															+    执行搜索和评估（支持并发）
														
 
															+
														
 
															+    并发控制：最多5个请求同时处理，超出的请求会等待
														
 
															+
														
 
															     Args:
														
 
															         request: 搜索请求
														
 
															-    
														
 
															+
														
 
															     Returns:
														
 
															         搜索结果响应
														
 
															-    
														
 
															+
														
 
															     Raises:
														
 
															         HTTPException: 当请求参数无效或处理失败时
														
 
															     """
														
 
															-    try:
														
 
															-        logger.info(f"收到搜索请求: original_target={request.original_target}, "
														
 
															-                   f"persona_features数量={len(request.persona_features)}, "
														
 
															-                   f"candidate_words数量={len(request.candidate_words)}")
														
 
															-        
														
 
															-        # 验证Pipeline包装器是否已初始化
														
 
															-        if pipeline_wrapper is None:
														
 
															-            logger.error("Pipeline包装器未初始化")
														
 
															-            raise HTTPException(status_code=503, detail="Pipeline包装器未初始化，请稍后重试")
														
 
															-        
														
 
															-        # 验证输入参数
														
 
															-        if not request.original_target or not request.original_target.strip():
														
 
															-            raise HTTPException(status_code=400, detail="original_target不能为空")
														
 
															-        
														
 
															-        if not request.persona_features or len(request.persona_features) == 0:
														
 
															-            raise HTTPException(status_code=400, detail="persona_features不能为空")
														
 
															-        
														
 
															-        if not request.candidate_words or len(request.candidate_words) == 0:
														
 
															-            raise HTTPException(status_code=400, detail="candidate_words不能为空")
														
 
															-        
														
 
															-        # 验证persona_features中的persona_feature_name
														
 
															-        for idx, pf in enumerate(request.persona_features):
														
 
															-            if not pf.persona_feature_name or not pf.persona_feature_name.strip():
														
 
															-                raise HTTPException(
														
 
															-                    status_code=400,
														
 
															-                    detail=f"persona_features[{idx}].persona_feature_name不能为空"
														
 
															+    # 步骤1：获取并发许可（限流）
														
 
															+    async with REQUEST_SEMAPHORE:
														
 
															+        # 步骤2：创建请求专用上下文
														
 
															+        async with RequestContextManager(base_dir="temp_requests") as ctx:
														
 
															+            try:
														
 
															+                logger.info(f"[{ctx.request_id}] 收到搜索请求: "
														
 
															+                           f"original_target={request.original_target}, "
														
 
															+                           f"persona_features={len(request.persona_features)}, "
														
 
															+                           f"candidate_words={len(request.candidate_words)}")
														
 
															+
														
 
															+                # 验证输入参数
														
 
															+                if not request.original_target or not request.original_target.strip():
														
 
															+                    raise HTTPException(status_code=400, detail="original_target不能为空")
														
 
															+
														
 
															+                if not request.persona_features or len(request.persona_features) == 0:
														
 
															+                    raise HTTPException(status_code=400, detail="persona_features不能为空")
														
 
															+
														
 
															+                if not request.candidate_words or len(request.candidate_words) == 0:
														
 
															+                    raise HTTPException(status_code=400, detail="candidate_words不能为空")
														
 
															+
														
 
															+                # 验证persona_features中的persona_feature_name
														
 
															+                for idx, pf in enumerate(request.persona_features):
														
 
															+                    if not pf.persona_feature_name or not pf.persona_feature_name.strip():
														
 
															+                        raise HTTPException(
														
 
															+                            status_code=400,
														
 
															+                            detail=f"persona_features[{idx}].persona_feature_name不能为空"
														
 
															+                        )
														
 
															+
														
 
															+                # 步骤3：创建请求专用的Pipeline实例
														
 
															+                pipeline_wrapper = PipelineWrapper(
														
 
															+                    output_dir=str(ctx.work_dir),
														
 
															+                    request_id=ctx.request_id
														
 
															                 )
														
 
															-        
														
 
															-        # 步骤1：将API输入转换为pipeline格式
														
 
															-        logger.info("步骤1：转换API输入格式...")
														
 
															-        try:
														
 
															-            features_data = convert_api_input_to_pipeline_format(
														
 
															-                original_target=request.original_target,
														
 
															-                persona_features=[pf.dict() for pf in request.persona_features],
														
 
															-                candidate_words=request.candidate_words
														
 
															-            )
														
 
															-        except Exception as e:
														
 
															-            logger.error(f"API输入格式转换失败: {e}", exc_info=True)
														
 
															-            raise HTTPException(status_code=400, detail=f"输入格式转换失败: {str(e)}")
														
 
															-        
														
 
															-        if not features_data:
														
 
															-            raise HTTPException(status_code=400, detail="无法构建有效的特征数据，请检查输入参数")
														
 
															-        
														
 
															-        # 步骤2：执行阶段3-7
														
 
															-        logger.info("步骤2：执行阶段3-7...")
														
 
															-        try:
														
 
															-            pipeline_output = await pipeline_wrapper.run_stages_3_to_7(features_data)
														
 
															-        except Exception as e:
														
 
															-            logger.error(f"阶段3-7执行失败: {e}", exc_info=True)
														
 
															-            raise HTTPException(status_code=500, detail=f"Pipeline执行失败: {str(e)}")
														
 
															-        
														
 
															-        # 验证pipeline输出
														
 
															-        if not pipeline_output or 'evaluation_results' not in pipeline_output:
														
 
															-            logger.error("Pipeline输出格式不正确")
														
 
															-            raise HTTPException(status_code=500, detail="Pipeline输出格式不正确")
														
 
															-        
														
 
															-        # 步骤3：将pipeline输出转换为API响应格式
														
 
															-        logger.info("步骤3：转换API输出格式...")
														
 
															-        try:
														
 
															-            response = convert_pipeline_output_to_api_response(
														
 
															-                pipeline_results=pipeline_output['evaluation_results'],
														
 
															-                original_target=request.original_target,
														
 
															-                similarity_results=pipeline_output.get('similarity_results')
														
 
															-            )
														
 
															-        except Exception as e:
														
 
															-            logger.error(f"API输出格式转换失败: {e}", exc_info=True)
														
 
															-            raise HTTPException(status_code=500, detail=f"输出格式转换失败: {str(e)}")
														
 
															-        
														
 
															-        logger.info(f"搜索完成: 找到 {len(response['search_results'])} 个有效结果 "
														
 
															-                   f"(综合得分P > 0)")
														
 
															-        
														
 
															-        return response
														
 
															-        
														
 
															-    except HTTPException:
														
 
															-        raise
														
 
															-    except Exception as e:
														
 
															-        logger.error(f"搜索请求处理失败: {e}", exc_info=True)
														
 
															-        raise HTTPException(status_code=500, detail=f"内部服务器错误: {str(e)}")
														
 
															+
														
 
															+                # 步骤4：转换API输入格式
														
 
															+                logger.info(f"[{ctx.request_id}] 转换API输入格式...")
														
 
															+                try:
														
 
															+                    features_data = convert_api_input_to_pipeline_format(
														
 
															+                        original_target=request.original_target,
														
 
															+                        persona_features=[pf.dict() for pf in request.persona_features],
														
 
															+                        candidate_words=request.candidate_words
														
 
															+                    )
														
 
															+                except Exception as e:
														
 
															+                    logger.error(f"[{ctx.request_id}] API输入格式转换失败: {e}", exc_info=True)
														
 
															+                    raise HTTPException(status_code=400, detail=f"输入格式转换失败: {str(e)}")
														
 
															+
														
 
															+                if not features_data:
														
 
															+                    raise HTTPException(status_code=400, detail="无法构建有效的特征数据")
														
 
															+
														
 
															+                # 步骤5：执行阶段3-7（在线程池中运行以避免阻塞）
														
 
															+                logger.info(f"[{ctx.request_id}] 执行阶段3-7...")
														
 
															+                try:
														
 
															+                    # 获取当前事件循环
														
 
															+                    loop = asyncio.get_event_loop()
														
 
															+
														
 
															+                    # 在线程池中运行阻塞的pipeline代码
														
 
															+                    pipeline_output = await loop.run_in_executor(
														
 
															+                        EXECUTOR,
														
 
															+                        pipeline_wrapper.run_stages_3_to_7_sync,
														
 
															+                        features_data
														
 
															+                    )
														
 
															+                except Exception as e:
														
 
															+                    logger.error(f"[{ctx.request_id}] 阶段3-7执行失败: {e}", exc_info=True)
														
 
															+                    raise HTTPException(status_code=500, detail=f"Pipeline执行失败: {str(e)}")
														
 
															+
														
 
															+                # 验证pipeline输出
														
 
															+                if not pipeline_output or 'evaluation_results' not in pipeline_output:
														
 
															+                    logger.error(f"[{ctx.request_id}] Pipeline输出格式不正确")
														
 
															+                    raise HTTPException(status_code=500, detail="Pipeline输出格式不正确")
														
 
															+
														
 
															+                # 步骤6：转换API输出格式
														
 
															+                logger.info(f"[{ctx.request_id}] 转换API输出格式...")
														
 
															+                try:
														
 
															+                    response = convert_pipeline_output_to_api_response(
														
 
															+                        pipeline_results=pipeline_output['evaluation_results'],
														
 
															+                        original_target=request.original_target,
														
 
															+                        similarity_results=pipeline_output.get('similarity_results')
														
 
															+                    )
														
 
															+                except Exception as e:
														
 
															+                    logger.error(f"[{ctx.request_id}] API输出格式转换失败: {e}", exc_info=True)
														
 
															+                    raise HTTPException(status_code=500, detail=f"输出格式转换失败: {str(e)}")
														
 
															+
														
 
															+                logger.info(f"[{ctx.request_id}] 搜索完成: "
														
 
															+                           f"找到 {len(response['search_results'])} 个有效结果")
														
 
															+
														
 
															+                # 步骤7：返回结果（工作目录会在退出上下文时自动清理）
														
 
															+                return response
														
 
															+
														
 
															+            except HTTPException:
														
 
															+                raise
														
 
															+            except Exception as e:
														
 
															+                logger.error(f"[{ctx.request_id}] 请求失败: {e}", exc_info=True)
														
 
															+                raise HTTPException(status_code=500, detail=f"内部服务器错误: {str(e)}")
														
 
															 @app.get("/health")
														
@@ -199,6 +216,7 @@ async def health_check():
 
															     """健康检查端点"""
														
 
															     return {
														
 
															         "status": "healthy",
														
 
															-        "pipeline_initialized": pipeline_wrapper is not None
														
 
															+        "max_concurrent_requests": MAX_CONCURRENT_REQUESTS,
														
 
															+        "semaphore_initialized": REQUEST_SEMAPHORE is not None
														
 
															     }