Browse Source

v0.4提交

罗俊辉 9 months ago
parent
commit
26bf7879f8
2 changed files with 116 additions and 15 deletions
  1. 37 15
      tasks/auto_getoff_videos.py
  2. 79 0
      tasks/migrate_vids.py

+ 37 - 15
tasks/auto_getoff_videos.py

@@ -33,19 +33,38 @@ class AutoGetOffVideos(object):
             charset="utf8mb4"
         )
         select_sql = f"""
-        SELECT recall_video_id1, recall_video_id2, recall_video_id3 
-        FROM long_articles_video 
-        WHERE content_status = %s and request_time_stamp < %s;
+        SELECT video_id
+        FROM article_match_videos
+        WHERE video_status = 1 and request_time < %s;
         """
         cursor = spider_connection.cursor()
-        cursor.execute(select_sql, (2, time_stamp))
+        cursor.execute(select_sql, time_stamp)
         data = cursor.fetchall()
-        vid_set = set()
-        for item in data:
-            for vid in item:
-                if vid:
-                    vid_set.add(vid)
-        return tuple(vid_set)
+        return data
+
+    @classmethod
+    def update_mysql_status(cls, video_id):
+        """
+        修改数据库内视频状态
+        :param video_id:
+        :return:
+        """
+        spider_connection = pymysql.connect(
+            host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
+            port=3306,
+            user="crawler",
+            passwd="crawler123456@",
+            db="piaoquan-crawler",
+            charset="utf8mb4"
+        )
+        select_sql = f"""
+                UPDATE article_match_videos
+                SET video_status = 0
+                WHERE video_id = %s;
+                """
+        cursor = spider_connection.cursor()
+        cursor.execute(select_sql, video_id)
+        spider_connection.commit()
 
     @classmethod
     def check_video_by_id(cls, video_id):
@@ -70,12 +89,14 @@ class AutoGetOffVideos(object):
         修改视频规则
         :return:
         """
+        cls.update_mysql_status(video_id=video_id)
         path = "./static/{}.json".format(video_id)
         if os.path.exists(path):
             print("File already Exists")
         else:
             url = "https://admin.piaoquantv.com/manager/video/audit/v2/updateAuditStatus"
-            payload = "videoId={}&auditStatus=2&updateReasonJson=&rejectReasonJson=%5B%7B%22reason%22%3A%22%E9%95%BF%E6%96%87%E8%87%AA%E5%8A%A8%E4%B8%8B%E6%9E%B6%22%2C%22reasonId%22%3A-1%7D%5D&adminUid=206".format(video_id)
+            payload = "videoId={}&auditStatus=2&updateReasonJson=&rejectReasonJson=%5B%7B%22reason%22%3A%22%E9%95%BF%E6%96%87%E8%87%AA%E5%8A%A8%E4%B8%8B%E6%9E%B6%22%2C%22reasonId%22%3A-1%7D%5D&adminUid=206".format(
+                video_id)
             headers = {
                 'accept': 'application/json',
                 'accept-language': 'en,zh;q=0.9,zh-CN;q=0.8',
@@ -97,6 +118,7 @@ class AutoGetOffVideos(object):
                 headers=headers,
                 data=payload
             )
+            # print(response.json())
             if response.json()['code'] == 0:
                 with open(path, "w") as f:
                     f.write(json.dumps({"time": int(time.time())}))
@@ -112,8 +134,9 @@ class AutoGetOffVideos(object):
         now_time_stamp = int(time.time())
         three_days_before = now_time_stamp - 4 * 24 * 3600
         vid_set = cls.get_long_articles_video_set(time_stamp=three_days_before)
+        vid_list = [i[0] for i in vid_set]
         with ThreadPoolExecutor(max_workers=8) as Pool:
-            Pool.map(cls.check_video_by_id, list(vid_set))
+            Pool.map(cls.check_video_by_id, vid_list)
 
     @classmethod
     def task1(cls):
@@ -124,7 +147,6 @@ class AutoGetOffVideos(object):
         now_stamp = int(time.time())
         seven_days_before = now_stamp - 8 * 24 * 3600
         video_set = cls.get_long_articles_video_set(time_stamp=seven_days_before)
+        vid_list = [i[0] for i in video_set]
         with ThreadPoolExecutor(max_workers=8) as Pool1:
-            Pool1.map(cls.change_status, list(video_set))
-
-
+            Pool1.map(cls.change_status, vid_list)

+ 79 - 0
tasks/migrate_vids.py

@@ -0,0 +1,79 @@
+"""
+@author: luojunhui
+"""
+import pymysql
+from concurrent.futures.thread import ThreadPoolExecutor
+
+
+def get_data_list():
+    """
+    获取数据
+    :return:
+    """
+    connection = pymysql.connect(
+        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
+        port=3306,
+        user="crawler",
+        passwd="crawler123456@",
+        db="piaoquan-crawler",
+        charset="utf8mb4"
+    )
+    sql = "select trace_id, content_id, recall_video_id1, recall_video_id2, recall_video_id3 from long_articles_video where content_status = 2;"
+    cursor = connection.cursor()
+    cursor.execute(
+        sql
+    )
+    data = cursor.fetchall()
+    return data
+
+
+def mig(single_tuple):
+    """
+    :param single_tuple
+    :return:
+    """
+    trace_id = single_tuple[0]
+    content_id = single_tuple[1]
+    vid1 = single_tuple[2]
+    vid2 = single_tuple[3]
+    vid3 = single_tuple[4]
+    request_time = trace_id.split("-")[-1]
+    if "lehuo" in content_id:
+        video_status = 0
+    else:
+        video_status = 1
+    vid_list = [vid1, vid2, vid3]
+    for video_id in vid_list:
+        if video_id:
+            insert_sql = f"""
+            INSERT INTO article_match_videos
+            (video_id, trace_id, content_id, request_time, video_status)
+            values 
+            (%s, %s, %s, %s, %s);
+            """
+            connection = pymysql.connect(
+                host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
+                port=3306,
+                user="crawler",
+                passwd="crawler123456@",
+                db="piaoquan-crawler",
+                charset="utf8mb4"
+            )
+            cursor = connection.cursor()
+            cursor.execute(
+                insert_sql,
+                (
+                    video_id, trace_id, content_id, request_time, video_status
+                )
+            )
+            connection.commit()
+        else:
+            continue
+
+
+tuple_list = get_data_list()
+
+print(len(tuple_list))
+
+with ThreadPoolExecutor(max_workers=20) as Pool:
+    Pool.map(mig, tuple_list)