Browse Source

增加祝万物复苏——推荐流不过

罗俊辉 1 year ago
parent
commit
8249c4f76a
2 changed files with 27 additions and 22 deletions
  1. 2 1
      application/pipeline/pipeline.py
  2. 25 21
      spider/crawler_online/zhuwanwufusu_2.py

+ 2 - 1
application/pipeline/pipeline.py

@@ -126,7 +126,8 @@ class PiaoQuanPipeline(object):
         视频是否重复
         :return:
         """
-        # sql = f""" select * from crawler_video where platform="公众号" and out_video_id="{video_id}"; """
+        if self.platform == "zhuwanwufusunew" and self.mode == "recommend":
+            return True
         out_id = self.item["out_video_id"]
         sql = f""" select 1 from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}"; """
         repeat_video = self.mysql.select(sql=sql)

+ 25 - 21
spider/crawler_online/zhuwanwufusu_2.py

@@ -126,6 +126,8 @@ class ZhuWanWuFuSuRecommend(object):
                 page_index = result["list"]["current"] + 1
                 for index, video_obj in enumerate(result["list"]["records"], 1):
                     try:
+                        # c += 1
+                        # print(c)
                         self.aliyun_log.logging(
                             code="1001", message="扫描到一条视频", data=video_obj
                         )
@@ -142,6 +144,8 @@ class ZhuWanWuFuSuRecommend(object):
                 self.aliyun_log.logging(
                     code="3000", message="抓取第{}页的时候失败, 报错原因是{}".format(page_index, e)
                 )
+            if self.limit_flag:
+                return
             time.sleep(random.randint(5, 10))
 
     def get_user_videos(self, user_id):
@@ -360,27 +364,27 @@ class ZhuWanWuFuSuRecommend(object):
         先抓取推荐列表的视频, 等待 2 分钟后抓取 detail 页面,等待 5 分钟后,抓取账号视频
         """
         self.get_recommend_list()
-        if self.limit_flag:
-            return
-        time.sleep(2 * 60)
-        self.get_detail_video_list()
-        if self.limit_flag:
-            return
-        time.sleep(5 * 60)
-        self.mode = "author"
-        user_list = self.get_user_list()
-        if user_list:
-            for index, user_id in enumerate(user_list):
-                try:
-                    if self.limit_flag:
-                        self.aliyun_log.logging(code="2000", message="本轮已经抓取足够数量的视频")
-                        return
-                    self.get_user_videos(user_id=user_id)
-                except Exception as e:
-                    self.aliyun_log.logging(
-                        code="3000",
-                        message="抓取账号视频出现异常,账号 id 是{}, 报错原因是{}".format(user_id, e),
-                    )
+        # if self.limit_flag:
+        #     return
+        # time.sleep(2 * 60)
+        # self.get_detail_video_list()
+        # if self.limit_flag:
+        #     return
+        # time.sleep(5 * 60)
+        # self.mode = "author"
+        # user_list = self.get_user_list()
+        # if user_list:
+        #     for index, user_id in enumerate(user_list):
+        #         try:
+        #             if self.limit_flag:
+        #                 self.aliyun_log.logging(code="2000", message="本轮已经抓取足够数量的视频")
+        #                 return
+        #             self.get_user_videos(user_id=user_id)
+        #         except Exception as e:
+        #             self.aliyun_log.logging(
+        #                 code="3000",
+        #                 message="抓取账号视频出现异常,账号 id 是{}, 报错原因是{}".format(user_id, e),
+        #             )
 
 
 # if __name__ == '__main__':