Procházet zdrojové kódy

video_generate_text

luojunhui před 4 měsíci
rodič
revize
e1a6a8f59b

+ 3 - 1
applications/api/__init__.py

@@ -1,6 +1,8 @@
 """
 @author: luojunhui
 """
+from .deep_seek_by_byte_dance_api import deep_seek_api
+from .google_ai_api import GoogleAIAPI
 from .moon_shot_api import generate_mini_program_title
 from .nlp_api import similarity_between_title_list
-from .google_ai_api import GoogleAIAPI
+

+ 21 - 16
applications/api/deep_seek_by_byte_dance_api.py

@@ -1,22 +1,27 @@
 """
 @author: luojunhui
 """
-api_key = '5e275c38-44fd-415f-abcf-4b59f6377f72'
-
 from openai import OpenAI
 
-client = OpenAI(
-    api_key=api_key,
-    base_url="https://ark.cn-beijing.volces.com/api/v3",
-    )
+from config import deep_seek_model
+from config import deep_seek_default_model
+from config import deep_seek_api_key_byte_dance
 
-chat_completion = client.chat.completions.create(
-    messages=[
-        {
-            "role": "user",
-            "content": "hello"
-        }
-    ],
-    model="",
-)
-response = chat_completion.choices[0].message.content
+
+def deep_seek_api(model, prompt):
+    """
+    deep_seek方法
+    """
+    client = OpenAI(
+        api_key=deep_seek_api_key_byte_dance,
+        base_url="https://ark.cn-beijing.volces.com/api/v3",
+        # 深度推理模型耗费时间会较长,建议您设置一个较长的超时时间,推荐为30分钟
+        timeout=1800,
+        )
+    response = client.chat.completions.create(
+        model=deep_seek_model.get(model, deep_seek_default_model),
+        messages=[
+            {"role": "user", "content": prompt}
+        ]
+    )
+    return response.choices[0].message.content

+ 22 - 6
coldStartTasks/multi_modal/generate_text_from_video.py

@@ -1,5 +1,6 @@
 """
 @author: luojunhui
+todo: 加上多进程锁
 """
 import os
 import time
@@ -13,11 +14,10 @@ from applications.api import GoogleAIAPI
 from applications.db import DatabaseConnector
 from config import long_articles_config
 
+# 办公室网络调试需要打开代理
 # os.environ["HTTP_PROXY"] = "http://192.168.100.20:1087"
 # os.environ["HTTPS_PROXY"] = "http://192.168.100.20:1087"
 
-PROCESSING_MAX_VIDEO_COUNT = 10
-
 
 def download_file(pq_vid, video_url):
     """
@@ -73,14 +73,19 @@ class GenerateTextFromVideo(object):
         )
         print(affected_rows)
 
-    def upload_video_to_google_ai(self):
+    def upload_video_to_google_ai(self, max_processing_video_count=100):
         """
         上传视频到Google AI
+        max_processing_video_count: 处理中的最大视频数量,默认1000
+        video_content_understanding 表status字段
+        0: 未处理
+        1: 处理中
+        2: 处理完成
         """
         # 查询出在视频处于PROCESSING状态的视频数量
         select_sql = "select count(1) as processing_count from video_content_understanding where status = 1;"
         count = self.db.fetch(select_sql, cursor_type=DictCursor)[0]['processing_count']
-        rest_video_count = PROCESSING_MAX_VIDEO_COUNT - count
+        rest_video_count = max_processing_video_count - count
         success_upload_count = 0
         if rest_video_count:
             sql = f"""select pq_vid, video_oss_path from video_content_understanding where status = 0 limit {rest_video_count};"""
@@ -122,6 +127,7 @@ class GenerateTextFromVideo(object):
         while task_list:
             for task in tqdm(task_list, desc="convert video to text"):
                 file_name = task['file_name']
+                video_local_path = "static/{}.mp4".format(task['pq_vid'])
                 google_file = self.google_ai_api.get_google_file(file_name)
                 state = google_file.state.name
                 match state:
@@ -141,7 +147,7 @@ class GenerateTextFromVideo(object):
                                     update_sql,
                                     params=(2, video_text, state, task['pq_vid'])
                                 )
-                                os.remove("static/{}.mp4".format(task['pq_vid']))
+                                os.remove(video_local_path)
                                 tqdm.write("video transform to text success, delete local file, sleep 1 min...")
                                 task_list.remove(task)
                         except Exception as e:
@@ -153,7 +159,17 @@ class GenerateTextFromVideo(object):
                         continue
 
                     case 'FAILED':
-                        tqdm.write("video process failed")
+                        update_sql = f"""
+                            update video_content_understanding
+                            set status = %s, file_state = %s
+                            where pq_vid = %s;
+                        """
+                        self.db.save(
+                            update_sql,
+                            params=(99, state, task['pq_vid'])
+                        )
+                        os.remove(video_local_path)
+                        tqdm.write("video process failed, delete local file")
                         continue
                 time.sleep(10)
 

+ 10 - 1
config/__init__.py

@@ -79,4 +79,13 @@ piaoquan_crawler_config = {
     'password': 'crawler123456@',
     'db': 'piaoquan-crawler',
     'charset': 'utf8mb4'
-}
+}
+
+deep_seek_model = {
+    "DeepSeek-R1": "ep-20250213194143-d8q4t",
+    "DeepSeek-V3": "ep-20250213194558-rrmr2"
+}
+
+deep_seek_default_model = "ep-20250213194558-rrmr2"
+
+deep_seek_api_key_byte_dance = '5e275c38-44fd-415f-abcf-4b59f6377f72'

+ 1 - 0
run_video_extract_text.py

@@ -10,6 +10,7 @@ import threading
 from tqdm import tqdm
 
 from coldStartTasks.multi_modal import GenerateTextFromVideo
+from config import apolloConfig
 
 
 class VideoProcessing: