jihuaqiang 2 settimane fa
parent
commit
0901d408ae
1 ha cambiato i file con 39 aggiunte e 6 eliminazioni
  1. 39 6
      tools/indentify/image_identifier.py

+ 39 - 6
tools/indentify/image_identifier.py

@@ -16,11 +16,15 @@ from PIL import Image
 import requests
 from io import BytesIO
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from utils.logging_config import get_logger
 
 # 导入自定义模块
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 from llm.openrouter import OpenRouterProcessor, OpenRouterModel
 
+# 创建 logger
+logger = get_logger('ImageIdentifier')
+
  # 构建OCR提示词
 prompt = """
 #### 人设
@@ -50,11 +54,21 @@ class ImageIdentifier:
         """确保Gemini已配置"""
         if not self._configured:
             self.api_key = os.getenv('GEMINI_API_KEY')
-            print(f"配置Gemini: {self.api_key}")
+            
             if not self.api_key:
                 raise ValueError("请在环境变量中设置 GEMINI_API_KEY")
             genai.configure(api_key=self.api_key)
-            self.model = genai.GenerativeModel('gemini-2.5-flash')
+            
+            # 创建模型时设置安全设置,避免内容被过滤
+            self.model = genai.GenerativeModel(
+                'gemini-2.5-flash',
+                safety_settings={
+                    genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
+                    genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
+                    genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
+                    genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE,
+                }
+            )
             self._configured = True
     
     def download_image(self, image_url: str) -> Optional[Image.Image]:
@@ -101,12 +115,31 @@ class ImageIdentifier:
 
                     # 使用 Gemini 直接分析图片
                     self._ensure_configured()
+                    logger.info(f"配置Gemini: {self.api_key}")
                     response = self.model.generate_content([system_prompt, image])
                     
-                    if response.text:
-                        return {"idx": idx, "url": url, "content": response.text, "success": True}
-                    else:
-                        return {"idx": idx, "url": url, "content": "", "success": False, "error": "识别失败或无内容返回"}
+                    # 检查响应状态
+                    if response.candidates and len(response.candidates) > 0:
+                        candidate = response.candidates[0]
+                        if candidate.finish_reason == 1:  # SAFETY
+                            logger.warning(f"图片 {url} 被安全过滤器阻止")
+                            return {"idx": idx, "url": url, "content": "", "success": False, "error": "内容被安全过滤器阻止"}
+                        elif candidate.finish_reason == 2:  # RECITATION
+                            logger.warning(f"图片 {url} 被引用过滤器阻止")
+                            return {"idx": idx, "url": url, "content": "", "success": False, "error": "内容被引用过滤器阻止"}
+                        elif candidate.finish_reason == 3:  # OTHER
+                            logger.warning(f"图片 {url} 被其他原因阻止")
+                            return {"idx": idx, "url": url, "content": "", "success": False, "error": "内容被其他原因阻止"}
+                    
+                    # 尝试获取文本内容
+                    try:
+                        if response.text:
+                            return {"idx": idx, "url": url, "content": response.text, "success": True}
+                        else:
+                            return {"idx": idx, "url": url, "content": "", "success": False, "error": "识别失败或无内容返回"}
+                    except Exception as text_error:
+                        logger.error(f"获取响应文本失败: {text_error}")
+                        return {"idx": idx, "url": url, "content": "", "success": False, "error": f"获取响应文本失败: {str(text_error)}"}
                         
                 except Exception as e:
                     return {"idx": idx, "url": url, "content": "", "success": False, "error": str(e)}