Parcourir la source

第二版本汤姆森 更新任务执行方式

zhangbo il y a 10 mois
Parent
commit
8f46b6ed5d

+ 46 - 12
write_redis/alg_ad_feature_03_cid2actionv1_redis.py

@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 import os
 import sys
+from multiprocessing import Process, cpu_count
+
 root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 if root_dir not in sys.path:
     sys.path.append(root_dir)
@@ -17,6 +19,7 @@ import json
 from datetime import datetime
 from queue import Queue
 from tqdm import tqdm
+import time
 
 
 
@@ -149,27 +152,56 @@ def main():
         mm = "00"
         log_.info("没有读取到参数,采用系统时间:{}".format(e))
     log_.info("使用时间参数-日期:{},小时:{}".format(date, str(hour)))
-    if hour in ["00", "01"]:
+    if hour in ["00", "01", "02", "03", "04", "05", "06"]:
         log_.info(f"hour={hour}不执行,直接返回。")
         return
     # 1 判断上游数据表是否生产完成
     project = "loghubods"
     table = "alg_ad_feature_cid_action_v1"
     table_data_cnt = check_data(project, table, date, hour, mm)
-    if table_data_cnt == 0:
-        log_.info("上游数据{}未就绪{}/{},等待...".format(table, date, hour))
-        Timer(60, main).start()
-    else:
-        log_.info("上游数据就绪,count={},开始读取数据表".format(table_data_cnt))
-        # 2 读取数据表 处理特征
-        video_list = get_sql(project, table, date, hour, mm)
-        # 3 写入redis
-        log_.info("video的数据量:{}".format(len(video_list)))
-        records_process_for_list(video_list, process_and_store, max_size=50, num_workers=8)
+    run_flag = table_data_cnt == 0
+    begin_ts = int(time.time())
+    while run_flag:
+        if int(time.time()) - begin_ts >= 60*50:
+            log_.info("等待上游数据超过50分钟了,认为失败退出:{}".format(int(time.time()) - begin_ts))
+            exit(999)
+        table_data_cnt = check_data(project, table, date, hour, mm)
+        if table_data_cnt == 0:
+            log_.info("上游数据{}未就绪{}/{},等待...".format(table, date, hour))
+            log_.info("等待2分钟")
+            time.sleep(60*2)
+        else:
+            run_flag = False
+
+    log_.info("上游数据就绪,count={},开始读取数据表".format(table_data_cnt))
+    # 2 读取数据表 处理特征
+    video_list = get_sql(project, table, date, hour, mm)
+    # 3 写入redis
+    log_.info("video的数据量:{}".format(len(video_list)))
+    records_process_for_list(video_list, process_and_store, max_size=50, num_workers=8)
+
+    # if table_data_cnt == 0:
+    #     log_.info("上游数据{}未就绪{}/{},等待...".format(table, date, hour))
+    #     Timer(60, main).start()
+    # else:
+    #     log_.info("上游数据就绪,count={},开始读取数据表".format(table_data_cnt))
+    #     # 2 读取数据表 处理特征
+    #     video_list = get_sql(project, table, date, hour, mm)
+    #     # 3 写入redis
+    #     log_.info("video的数据量:{}".format(len(video_list)))
+    #     records_process_for_list(video_list, process_and_store, max_size=50, num_workers=8)
 
 if __name__ == '__main__':
     log_.info("开始执行:" + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
-    main()
+    # main()
+    process = Process(target=main)
+    process.start()
+    # 等待子进程完成或超时
+    process.join(timeout=3600)  # 设置超时为3600秒(1小时)
+    if process.is_alive():
+        print("脚本执行时间超过1小时,执行失败。")
+        process.terminate()  # 终止子进程
+        exit(999)  # 直接退出主进程并返回状态码999
     log_.info("完成执行:" + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
 
 
@@ -185,4 +217,6 @@ if __name__ == '__main__':
             sql 各种字段
             record 各种字段
             if hour in ["00"]: 哪些小时不执行
+            process.join(timeout=3600) 任务超时时间3600
+            int(time.time()) - begin_ts >= 60*50 任务超时时间3000
 """

+ 1 - 1
write_redis/alg_ad_feature_04_vidcid2actionv1_redis.py

@@ -177,7 +177,7 @@ if __name__ == '__main__':
 
 
 # cd /root/zhangbo/rov-offline
-# python alg_ad_feature_02_vidcid2action_redis.py 20240523 19 00
+# python alg_ad_feature_04_vidcid2actionv1_redis.py 20240530 20
 
 """
     !!!!!!!!!!!!!!