Parcourir la source

Merge branch '2025-03-18-distribute-audit-videos' of luojunhui/LongArticlesJob into master

luojunhui il y a 7 mois
Parent
commit
471cc98cac

+ 3 - 0
applications/const/__init__.py

@@ -198,6 +198,9 @@ class WeixinVideoCrawlerConst:
     # 每天发送的审核视频数量
     MAX_VIDEO_NUM = 1000
 
+    # 单次发布视频审核量
+    MAX_VIDEO_NUM_PER_PUBLISH = 350
+
     # 标题状态
     TITLE_DEFAULT_STATUS = 0
     TITLE_EXIT_STATUS = 1

+ 3 - 1
coldStartTasks/publish/publish_video_to_pq_for_audit.py

@@ -36,12 +36,14 @@ class PublishVideosForAudit(object):
         """
         already_published_count = self.get_published_articles_today()
         rest_count = const.MAX_VIDEO_NUM - already_published_count
+
+        limit_count = min(rest_count, const.MAX_VIDEO_NUM_PER_PUBLISH)
         sql = f"""
             SELECT id, article_title, video_oss_path 
             FROM publish_single_video_source 
             WHERE audit_status = {const.VIDEO_AUDIT_INIT_STATUS} and bad_status = {const.TITLE_DEFAULT_STATUS}
             ORDER BY score DESC
-            LIMIT {rest_count};
+            LIMIT {limit_count};
             """
         response = self.db_client.fetch(sql, cursor_type=DictCursor)
         return response

+ 26 - 0
sh/run_gzh_video_crawler.sh

@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# 获取当前日期,格式为 YYYY-MM-DD
+CURRENT_DATE=$(date +%F)
+
+# 日志文件路径,包含日期
+LOG_FILE="/root/luojunhui/logs/gzh_video_crawler_log_$CURRENT_DATE.txt"
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+if pgrep -f "python3 run_video_account_crawler.py" > /dev/null
+then
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - run_video_account_crawler.py is running"
+else
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart run_video_account_crawler.py"
+    # 切换到指定目录
+    cd /root/luojunhui/LongArticlesJob
+
+    # 激活 Conda 环境
+    source /root/miniconda3/etc/profile.d/conda.sh
+    conda activate tasks
+
+    # 在后台运行 Python 脚本并重定向日志输出
+    nohup python3 run_video_account_crawler.py >> "${LOG_FILE}" 2>&1 &
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted run_video_account_crawler.py"
+fi

+ 0 - 2
sh/run_video_publish_and_audit.sh

@@ -21,8 +21,6 @@ else
     conda activate tasks
 
     # 在后台运行 Python 脚本并重定向日志输出
-    nohup python3 run_video_account_crawler.py >> "${LOG_FILE}" 2>&1 &
-    sleep 180
     nohup python3 run_video_publish_and_audit.py >> "${LOG_FILE}" 2>&1 &
     echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted run_video_publish_and_audit.py"
 fi