Bläddra i källkod

develop baidu video downloader

luojunhui 3 månader sedan
förälder
incheckning
55465c7751

+ 2 - 1
coldStartTasks/multi_modal/generate_text_from_video.py

@@ -76,7 +76,7 @@ class GenerateTextFromVideo(object):
     def upload_video_to_google_ai(self, max_processing_video_count=20):
         """
         上传视频到Google AI
-        max_processing_video_count: 处理中的最大视频数量,默认1000
+        max_processing_video_count: 处理中的最大视频数量,默认20
         video_content_understanding 表status字段
         0: 未处理
         1: 处理中
@@ -179,6 +179,7 @@ class GenerateTextFromVideo(object):
                         )
                         if os.path.exists(video_local_path):
                             os.remove(video_local_path)
+                        self.google_ai_api.delete_video(file_name)
                         tqdm.write("video process failed, delete local file")
                         continue
                 time.sleep(10)

+ 0 - 1
run_video_extract_text.py

@@ -4,7 +4,6 @@
 """
 
 from coldStartTasks.multi_modal import GenerateTextFromVideo
-from config import apolloConfig
 
 if __name__ == '__main__':
     generate_text_from_video = GenerateTextFromVideo()

+ 18 - 0
run_video_upload_to_google.py

@@ -0,0 +1,18 @@
+"""
+@author: luojunhui
+"""
+from coldStartTasks.multi_modal import GenerateTextFromVideo
+
+
+def upload_videos_to_google_task():
+    """
+    本地视频上传至google云存储
+    """
+    video_processing = GenerateTextFromVideo()
+    video_processing.connect_db()
+    video_processing.upload_video_to_google_ai()
+
+
+if __name__ == '__main__':
+    upload_videos_to_google_task()
+

+ 45 - 0
sh/run_video_summary_task.sh

@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# 获取当前日期,格式为 YYYY-MM-DD
+CURRENT_DATE=$(date +%F)
+
+# 日志文件路径,包含日期
+LOG_FILE="/root/luojunhui/logs/video_summary_log_$CURRENT_DATE.txt"
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+if pgrep -f "python3 run_video_extract_text.py" > /dev/null
+then
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - run_video_extract_text.py is running"
+else
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart run_video_extract_text.py"
+    # 切换到指定目录
+    cd /root/luojunhui/LongArticlesJob
+
+    # 激活 Conda 环境
+    source /root/miniconda3/etc/profile.d/conda.sh
+    conda activate tasks
+
+    # 在后台运行 Python 脚本并重定向日志输出
+    nohup python3 run_video_extract_text.py >> "${LOG_FILE}" 2>&1 &
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted run_video_publish_and_audit.py"
+fi
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+if pgrep -f "python3 run_video_upload_to_google.py" > /dev/null
+then
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - run_video_upload_to_google.py is running"
+else
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart run_video_upload_to_google.py"
+    # 切换到指定目录
+    cd /root/luojunhui/LongArticlesJob
+
+    # 激活 Conda 环境
+    source /root/miniconda3/etc/profile.d/conda.sh
+    conda activate tasks
+
+    # 在后台运行 Python 脚本并重定向日志输出
+    nohup python3 run_video_upload_to_google.py >> "${LOG_FILE}" 2>&1 &
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted run_video_upload_to_google.py"
+fi

+ 1 - 3
tasks/manage_google_storage_videos_task.py

@@ -29,9 +29,7 @@ def delete_finished_videos():
     video_processing.connect_db()
     storage_file_list = video_processing.google_ai_api.get_file_list()
     for file in storage_file_list:
-        print(file.name, file.expiration_time)
-        select_sql = f"""
-        """
+        video_processing.google_ai_api.delete_video(file.name)
 
 
 delete_finished_videos()