Przeglądaj źródła

migrateRootSourceId.py
每天 2:00 am 执行

罗俊辉 6 miesięcy temu
rodzic
commit
17fb5ea9d4
1 zmienionych plików z 24 dodań i 32 usunięć
  1. 24 32
      migrateRootSourceId.py

+ 24 - 32
migrateRootSourceId.py

@@ -9,7 +9,7 @@ import datetime
 import schedule
 from tqdm import tqdm
 
-from applications import Functions, PQMySQL, log
+from applications import Functions, longArticlesMySQL, PQMySQL, log
 
 
 class UpdateRootSourceId(object):
@@ -18,6 +18,7 @@ class UpdateRootSourceId(object):
     """
 
     db_client = PQMySQL()
+    lam = longArticlesMySQL()
     source_id_list = {
         'longArticles_2d311f88a9c1bd5a90ce88339ae93e78': 1,
         'longArticles_8d9fd0553c988e7a6bf3a6198f78d890': 1,
@@ -38,21 +39,21 @@ class UpdateRootSourceId(object):
     }
 
     @classmethod
-    def getDataList(cls, request_time_stamp):
+    def getDataList(cls, request_timestamp):
         """
 
-        :param request_time_stamp:
+        :param request_timestamp:
         :return:
         """
-        start_dt = request_time_stamp - 1 * 24 * 3600
+        start_dt = request_timestamp - 1 * 24 * 3600
         sql = f"""
-            select trace_id, gh_id, account_name, article_title, result1, result2, result3, request_time_stamp
-            from long_articles_video 
-            where request_time_stamp > {start_dt} 
-            and request_time_stamp < {request_time_stamp}
-            and content_status = 2;
+            select trace_id, gh_id, account_name, response, request_timestamp
+            from long_articles_match_videos
+            where request_timestamp >= {start_dt} 
+            and request_timestamp <= {request_timestamp}
+            and content_status = 4;
             """
-        result = cls.db_client.select(sql)
+        result = cls.lam.select(sql)
         log(
             task="migrateRootSourceId",
             function="getDataList",
@@ -70,21 +71,17 @@ class UpdateRootSourceId(object):
         trace_id = data_tuple[0]
         gh_id = data_tuple[1]
         account_name = data_tuple[2]
-        title = data_tuple[3]
-        result_1 = data_tuple[4]
-        result_2 = data_tuple[5]
-        result_3 = data_tuple[6]
-        request_time_stamp = data_tuple[7]
-        result_list = [result_1, result_2, result_3]
-        for result in result_list:
-            if result:
-                source_id = json.loads(result)['productionPath'].split("rootSourceId%3D")[1]
-                video_id = json.loads(result)['productionPath'].split("videos%3Fid%3D")[1].split("%26su%")[0]
+        request_timestamp = data_tuple[4]
+        response = json.loads(data_tuple[3])
+        if response:
+            for result in response:
+                source_id = result['rootSourceId']
+                video_id = result['videoId']
                 sql = f"""
                 INSERT INTO long_articles_root_source_id
-                (rootSourceId, accountName, ghId, articleTitle, requestTime, trace_id, push_type, video_id)
+                (rootSourceId, accountName, ghId, requestTime, trace_id, push_type, video_id)
                 values 
-                (%s, %s, %s, %s, %s, %s, %s, %s);
+                (%s, %s, %s, %s, %s, %s, %s);
                 """
                 try:
                     cls.db_client.update(
@@ -93,8 +90,7 @@ class UpdateRootSourceId(object):
                             source_id,
                             account_name,
                             gh_id,
-                            title,
-                            request_time_stamp,
+                            request_timestamp,
                             trace_id,
                             cls.source_id_list.get(source_id, 2),
                             video_id
@@ -114,8 +110,8 @@ class UpdateRootSourceId(object):
                         status="fail",
                         data={"trace_id": trace_id}
                     )
-            else:
-                print("No result")
+        else:
+            print("No result")
 
     @classmethod
     def sourceIdJob(cls):
@@ -143,12 +139,8 @@ def source_id_job():
 
 if __name__ == '__main__':
     # source_id_job()
-    schedule.every().day.at("01:00").do(Functions().job_with_thread, source_id_job)
+    schedule.every().day.at("02:00").do(Functions().job_with_thread, source_id_job)
     while True:
         schedule.run_pending()
         time.sleep(1)
-        # log(
-        #     task="migrateRootSourceId",
-        #     function="main",
-        #     message="迁移 source_id 任务正常执行"
-        # )
+