Просмотр исходного кода

下架一些视频任务,修改 deepseek-provider

luojunhui 11 часов назад
Родитель
Сommit
0d4b693f20

+ 31 - 14
applications/api/deep_seek_api_by_volcanoengine.py

@@ -1,26 +1,43 @@
 """
 @author: luojunhui
 """
+from typing import Any, Dict, List, Optional
+
 from volcenginesdkarkruntime import Ark
 
+from applications.utils.common import safe_json_parse
 from config import deep_seek_model
 from config import deep_seek_default_model
 from config import deep_seek_api_key_byte_dance
 
 
-def fetch_deepseek_response(model, prompt):
-    """
-    deep_seek方法
-    """
+def fetch_deepseek_completion(
+    model: str,
+    prompt: str,
+    output_type: str = "text",
+) -> Optional[Dict | List]:
+    input_messages = [{"role": "user", "content": prompt}]
+    kwargs: dict[str, Any] = {
+        "model": deep_seek_model.get(model, deep_seek_default_model),
+        "messages": input_messages,
+    }
+
     client = Ark(
-        api_key=deep_seek_api_key_byte_dance,
-        timeout=1800,
-        max_retries=2,
-        )
-    response = client.chat.completions.create(
-        model=deep_seek_model.get(model, deep_seek_default_model),
-        messages=[
-            {"role": "user", "content": prompt}
-        ]
+        base_url="https://ark.cn-beijing.volces.com/api/v3",
+        api_key=deep_seek_api_key_byte_dance
     )
-    return response.choices[0].message.content
+
+    try:
+        response = client.chat.completions.create(**kwargs)
+        output_text = response.choices[0].message.content
+
+        if output_type == "text":
+            return output_text
+        elif output_type == "json":
+            return safe_json_parse(output_text)
+        else:
+            raise ValueError(f"Invalid output_type: {output_type}")
+
+    except Exception as e:
+        print(f"[ERROR] fetch_deepseek_completion (volcengine) failed: {e}")
+        return None

+ 78 - 0
applications/utils/common.py

@@ -3,6 +3,10 @@
 """
 
 import hashlib
+import json
+import re
+
+from typing import Dict, List, Optional
 
 from datetime import datetime, timezone, date, timedelta
 from requests import RequestException
@@ -183,3 +187,77 @@ def days_remaining_in_month():
     remaining_days = (last_day_of_month - today).days
 
     return remaining_days
+
+
+def safe_json_parse(text: str) -> Optional[Dict | List]:
+    """多层降级解析 JSON:直接解析 → 提取代码块 → 提取 JSON 对象/数组
+
+    模型有时返回 ```json ... ``` 包裹的文本,或文本中夹杂 markdown 前缀/后缀。
+    先尝试直接解析(最常见路径),失败后逐层降级提取。
+    """
+    if not text:
+        return None
+
+    # 降级 1:直接解析
+    try:
+        return json.loads(text)
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    clean = text.strip()
+
+    # 降级 2:提取最外层 json 代码块 ```json ... ```
+    # 优先匹配带语言标注的,再退到任意 code fence
+    m = re.search(r"```json\s*(.*?)\s*```", clean, re.DOTALL)
+    if m:
+        try:
+            return json.loads(m.group(1))
+        except (json.JSONDecodeError, TypeError):
+            pass
+    else:
+        m = re.search(r"```\s*(.*?)\s*```", clean, re.DOTALL)
+        if m:
+            try:
+                return json.loads(m.group(1))
+            except (json.JSONDecodeError, TypeError):
+                pass
+
+    # 降级 3:在文本中查找第一个完整 JSON 对象 { ... } 或数组 [ ... ]
+    # 逐字符扫描,维护字符串状态机,正确处理内嵌括号和转义引号
+    for bracket_pair in [("{}", "{", "}"), ("[]", "[", "]")]:
+        opener, closer = bracket_pair[1], bracket_pair[2]
+        start = clean.find(opener)
+        if start == -1:
+            continue
+        depth = 0
+        in_string = False
+        escape_next = False
+        for i in range(start, len(clean)):
+            ch = clean[i]
+            if escape_next:
+                escape_next = False
+                continue
+            if ch == "\\":
+                escape_next = True
+                continue
+            if ch == '"' and not escape_next:
+                in_string = not in_string
+                continue
+            if in_string:
+                continue
+            if ch == opener:
+                depth += 1
+            elif ch == closer:
+                depth -= 1
+                if depth == 0:
+                    try:
+                        return json.loads(clean[start : i + 1])
+                    except (json.JSONDecodeError, TypeError):
+                        return None
+        # 数组或对象未闭合时也尝试下
+        try:
+            return json.loads(clean[start:])
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    return None

+ 4 - 4
config/__init__.py

@@ -100,13 +100,13 @@ deep_seek_official_model = {
 
 # deepseek volcano engine
 deep_seek_model = {
-    "DeepSeek-R1": "ep-20250213194143-d8q4t",
-    "DeepSeek-V3": "ep-20250213194558-rrmr2"
+    "DeepSeek-R1": "ep-20260625202225-w4vkd",
+    "DeepSeek-V3": "ep-20260625202225-w4vkd"
 }
 
-deep_seek_default_model = "ep-20250213194558-rrmr2"
+deep_seek_default_model = "ep-20260625202225-w4vkd"
 
-deep_seek_api_key_byte_dance = '5e275c38-44fd-415f-abcf-4b59f6377f72'
+deep_seek_api_key_byte_dance = 'ark-0d5dd17a-35a1-48dc-817a-f2f69c743acb-701ce'
 
 #GeWe
 gewe_token = "d3fb918f-0f36-4769-b095-410181614231"

+ 3 - 2
tasks/ai_tasks/category_generation_task.py

@@ -12,6 +12,7 @@ from tqdm import tqdm
 
 from applications import log
 from applications.api.deep_seek_api_official import fetch_deepseek_completion
+from applications.api.deep_seek_api_by_volcanoengine import fetch_deepseek_response
 from applications.const import CategoryGenerationTaskConst
 from applications.db import DatabaseConnector
 from applications.utils import yield_batch
@@ -345,7 +346,7 @@ class ArticlePoolCategoryGenerationTask(CategoryGenerationTask):
 
         prompt = category_generation_from_title(title_batch)
         try:
-            completion = fetch_deepseek_completion(
+            completion = fetch_deepseek_response(
                 model="DeepSeek-V3", prompt=prompt, output_type="json"
             )
             self.update_title_category(thread_db_client, article_id, completion)
@@ -378,7 +379,7 @@ class ArticlePoolCategoryGenerationTask(CategoryGenerationTask):
             prompt = category_generation_from_title(title_batch)
 
             try:
-                completion = fetch_deepseek_completion(
+                completion = fetch_deepseek_response(
                     model="DeepSeek-V3", prompt=prompt, output_type="json"
                 )
             except Exception as e:

+ 6 - 6
title_process_task.py

@@ -7,13 +7,13 @@ from tasks.ai_tasks.title_rewrite_task import TitleRewriteTask
 
 
 if __name__ == '__main__':
-    # 1. 标题重写
-    title_rewrite_task = TitleRewriteTask()
-    title_rewrite_task.deal()
+    # # 1. 标题重写
+    # title_rewrite_task = TitleRewriteTask()
+    # title_rewrite_task.deal()
 
-    # 2. 视频内容池标题分类
-    video_pool_category_generation_task = VideoPoolCategoryGenerationTask()
-    video_pool_category_generation_task.deal()
+    # # 2. 视频内容池标题分类
+    # video_pool_category_generation_task = VideoPoolCategoryGenerationTask()
+    # video_pool_category_generation_task.deal()
 
     # 3. 文章内容池标题分类
     article_pool_category_generation_task = ArticlePoolCategoryGenerationTask()