ソースを参照

使用新增的geminikey

jihuaqiang 2 週間 前
コミット
5a0a8e4546
4 ファイル変更57 行追加24 行削除
  1. 9 1
      .env
  2. 31 6
      agent.py
  3. 9 3
      tools/indentify/indentify.py
  4. 8 14
      tools/indentify/video_identifier.py

+ 9 - 1
.env

@@ -11,7 +11,15 @@ COZE_API_KEY=pat_pClXS15hyuqohC9TK58vU7130Hp6QmmHlnyW2TjFpKVWKsW2B1VniFwdXkY3eRN
 COZE_BOT_ID=7537570163895812146
 
 # Gemini
-GEMINI_API_KEY=AIzaSyAkt1l9Kw1CQgHFzTpla0vgt0OE53fr-BI
+GEMINI_API_KEY_1=AIzaSyAkt1l9Kw1CQgHFzTpla0vgt0OE53fr-BI
+GEMINI_API_KEY_2=AIzaSyAkt1l9Kw1CQgHFzTpla0vgt0OE53fr-BI
+GEMINI_API_KEY_3=AIzaSyCl_xx7oJiA-lIIq56l_Fvxg-XE3c9M4gg
+GEMINI_API_KEY_4=AIzaSyAgNBEBTbOymx8625KrmVSli0-V6Bumf_0
+GEMINI_API_KEY_5=AIzaSyAJ1-83oNw9zKlazyijsATrJDRQYgu7yBU
+GEMINI_API_KEY_6=AIzaSyBPjb7sVZXUT7rFp8Awxnx5L-_xfxTchgw
+GEMINI_API_KEY_7=AIzaSyBLlzG_XRVm-830eb4rvI3GZ-IST1q6JUI
+
+
 
 # DeepSeek 阿里云
 DEEPSEEK_API_KEY=sk-35504b23097f4662899638869c2a63b3

+ 31 - 6
agent.py

@@ -370,8 +370,12 @@ RUNNING_LOCK = asyncio.Lock()
 
 def process_single_item(args):
     """处理单个数据项的函数,用于多进程 (模块级,便于pickle)"""
-    idx, item, request_id = args
+    idx, item, request_id, api_key = args
     try:
+        # 临时设置环境变量以使用指定的API密钥
+        original_api_key = os.getenv('GEMINI_API_KEY')
+        os.environ['GEMINI_API_KEY'] = api_key
+        
         crawl_data = item.get('crawl_data') or {}
         content_id = item.get('content_id') or ''
         task_id = item.get('task_id') or ''
@@ -461,6 +465,13 @@ def process_single_item(args):
             "status": 3,
             "success": False
         }
+    finally:
+        # 恢复原始API密钥
+        if 'original_api_key' in locals():
+            if original_api_key is not None:
+                os.environ['GEMINI_API_KEY'] = original_api_key
+            else:
+                os.environ.pop('GEMINI_API_KEY', None)
 
 
 def create_langgraph_workflow():
@@ -503,10 +514,24 @@ def create_langgraph_workflow():
                 state["status"] = "completed"
                 return state
             
-            # 准备多进程参数
-            process_args = [(idx, item, state["request_id"]) for idx, item in enumerate(items, start=1)]
+            # 获取7个不同的GEMINI API密钥
+            api_keys = []
+            for i in range(1, 8):  # GEMINI_API_KEY_1 到 GEMINI_API_KEY_7
+                api_key = os.getenv(f'GEMINI_API_KEY_{i}')
+                if api_key:
+                    api_keys.append(api_key)
+                else:
+                    logger.warning(f"未找到 GEMINI_API_KEY_{i},使用默认 GEMINI_API_KEY")
+                    api_keys.append(os.getenv('GEMINI_API_KEY'))
+            
+            # 准备多进程参数,为每个任务分配API密钥
+            process_args = []
+            for idx, item in enumerate(items, start=1):
+                # 循环使用7个API密钥
+                api_key = api_keys[(idx - 1) % 7]
+                process_args.append((idx, item, state["request_id"], api_key))
             
-            # 使用3个进程并行处理,添加多进程保护
+            # 使用7个进程并行处理,添加多进程保护
             if __name__ == '__main__' or multiprocessing.current_process().name == 'MainProcess':
                 # 设置多进程启动方法为 'spawn' 以避免 gRPC fork 问题
                 original_start_method = multiprocessing.get_start_method()
@@ -515,8 +540,8 @@ def create_langgraph_workflow():
                 except RuntimeError:
                     pass  # 如果已经设置过,忽略错误
                 
-                with multiprocessing.Pool(processes=2) as pool:
-                    logger.info(f"开始多进程处理: 数量={len(process_args)}")
+                with multiprocessing.Pool(processes=7) as pool:
+                    logger.info(f"开始多进程处理: 数量={len(process_args)}, 使用7个进程")
                     results = pool.map(process_single_item, process_args)
                 
                 # 恢复原始启动方法

+ 9 - 3
tools/indentify/indentify.py

@@ -38,9 +38,9 @@ class ContentIdentifier:
         # 初始化数据库连接
         self.db = MysqlHelper()
         
-        # 初始化识别模块
-        self.image_identifier = ImageIdentifier()
-        self.video_identifier = VideoIdentifier()
+        # 延迟初始化识别模块,确保在需要时使用正确的环境变量
+        self.image_identifier = None
+        self.video_identifier = None
     
 
     def get_unprocessed_record(self) -> Optional[Dict[str, Any]]:
@@ -117,6 +117,12 @@ class ContentIdentifier:
         """处理内容识别,调用独立的识别模块"""
         self.logger.info("开始内容识别处理...")
         
+        # 延迟初始化识别模块,确保使用正确的环境变量
+        if self.image_identifier is None:
+            self.image_identifier = ImageIdentifier()
+        if self.video_identifier is None:
+            self.video_identifier = VideoIdentifier()
+        
         # 图片识别
         image_result = self.image_identifier.process_images(formatted_content)
         

+ 8 - 14
tools/indentify/video_identifier.py

@@ -16,7 +16,6 @@ import uuid
 import requests
 from typing import Dict, Any, List, Optional
 from dotenv import load_dotenv
-from concurrent.futures import ThreadPoolExecutor, as_completed
 
 # 导入自定义模块
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
@@ -511,15 +510,10 @@ class VideoIdentifier:
                         except Exception:
                             pass
 
-            # 并发处理所有视频(每个线程完成完整流程)
-            with ThreadPoolExecutor(max_workers=5) as pool:
-                future_to_item = {pool.submit(complete_video_job, item): item for item in video_data}
-                
-                for future in as_completed(list(future_to_item.keys())):
-                    result = future.result()
-                    url = result['url']
-                    idx = url_to_index[url]
-                    results[idx] = result
+            # 单独遍历处理所有视频
+            for i, item in enumerate(video_data):
+                result = complete_video_job(item)
+                results[i] = result
 
             return results
 
@@ -542,10 +536,10 @@ def main():
                 "video_url": "http://rescdn.yishihui.com/pipeline/video/489e7c31-4e7c-44cc-872d-b1b1dd42b12d.mp4",
                 "video_duration": 187
             },
-            # {
-            #     "video_url": "http://temp.yishihui.com/pipeline/video/43d11b20-6273-4ece-a146-94f63a3992a8.mp4",
-            #     "video_duration": 100
-            # },
+            {
+                "video_url": "http://temp.yishihui.com/pipeline/video/43d11b20-6273-4ece-a146-94f63a3992a8.mp4",
+                "video_duration": 100
+            },
             # {
             #     "video_url": "http://temp.yishihui.com/longvideo/transcode/video/vpc/20250731/57463792ND5eu5PAj95sVLi2gB.mp4",
             #     "video_duration": 100