wangkun 2 years ago
parent
commit
11d9d0a222

BIN
.DS_Store


+ 50 - 25
gongzhonghao/gongzhonghao_author/gongzhonghao1_author.py

@@ -54,30 +54,30 @@ class GongzhonghaoAuthor1:
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
 
-        # rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
-        # rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
-        # if rule_fans_cnt_max == 0:
-        #     rule_fans_cnt_max = 100000000
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
 
-        # rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
-        # rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
-        # if rule_videos_cnt_max == 0:
-        #     rule_videos_cnt_max = 100000000
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
 
         rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
         rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
         if rule_like_cnt_max == 0:
             rule_like_cnt_max = 100000000
 
-        # rule_width_min = rule_dict.get('width', {}).get('min', 0)
-        # rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
-        # if rule_width_max == 0:
-        #     rule_width_max = 100000000
-        #
-        # rule_height_min = rule_dict.get('height', {}).get('min', 0)
-        # rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
-        # if rule_height_max == 0:
-        #     rule_height_max = 100000000
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
+
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
 
         rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
         rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
@@ -89,19 +89,39 @@ class GongzhonghaoAuthor1:
         if rule_comment_cnt_max == 0:
             rule_comment_cnt_max = 100000000
 
-        Common.logger(log_type, crawler).info(f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
-        Common.logger(log_type, crawler).info(f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
-        Common.logger(log_type, crawler).info(f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
+
+        Common.logger(log_type, crawler).info(
+            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
-                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
                 and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
                 and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
-                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min):
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min)\
+                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min):
             return True
         else:
             return False
@@ -127,6 +147,7 @@ class GongzhonghaoAuthor1:
         if len(configs) == 0:
             # Common.logger(log_type, crawler).warning(f"公众号_1未配置token")
             Feishu.bot(log_type, crawler, "公众号_1:未配置token")
+            time.sleep(60)
             return None
         token_dict = {
             "token_id": configs[0]["id"],
@@ -442,6 +463,10 @@ class GongzhonghaoAuthor1:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
             return
+        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
+            shutil.rmtree(f"./{crawler}/videos/{md_title}")
+            Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
                                title=video_dict["video_title"], url=video_dict["cover_url"])

+ 50 - 25
gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py

@@ -54,30 +54,30 @@ class GongzhonghaoAuthor2:
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
 
-        # rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
-        # rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
-        # if rule_fans_cnt_max == 0:
-        #     rule_fans_cnt_max = 100000000
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
 
-        # rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
-        # rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
-        # if rule_videos_cnt_max == 0:
-        #     rule_videos_cnt_max = 100000000
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
 
         rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
         rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
         if rule_like_cnt_max == 0:
             rule_like_cnt_max = 100000000
 
-        # rule_width_min = rule_dict.get('width', {}).get('min', 0)
-        # rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
-        # if rule_width_max == 0:
-        #     rule_width_max = 100000000
-        #
-        # rule_height_min = rule_dict.get('height', {}).get('min', 0)
-        # rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
-        # if rule_height_max == 0:
-        #     rule_height_max = 100000000
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
+
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
 
         rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
         rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
@@ -89,19 +89,39 @@ class GongzhonghaoAuthor2:
         if rule_comment_cnt_max == 0:
             rule_comment_cnt_max = 100000000
 
-        Common.logger(log_type, crawler).info(f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
-        Common.logger(log_type, crawler).info(f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
-        Common.logger(log_type, crawler).info(f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
+
+        Common.logger(log_type, crawler).info(
+            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
-                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
                 and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
                 and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
-                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min):
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min) \
+                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min):
             return True
         else:
             return False
@@ -127,6 +147,7 @@ class GongzhonghaoAuthor2:
         if len(configs) == 0:
             # Common.logger(log_type, crawler).warning(f"公众号_2未配置token")
             Feishu.bot(log_type, crawler, "公众号_2:未配置token")
+            time.sleep(60)
             return None
         token_dict = {
             "token_id": configs[0]["id"],
@@ -442,6 +463,10 @@ class GongzhonghaoAuthor2:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
             return
+        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
+            shutil.rmtree(f"./{crawler}/videos/{md_title}")
+            Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
                                title=video_dict["video_title"], url=video_dict["cover_url"])

+ 50 - 25
gongzhonghao/gongzhonghao_author/gongzhonghao3_author.py

@@ -54,30 +54,30 @@ class GongzhonghaoAuthor3:
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
 
-        # rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
-        # rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
-        # if rule_fans_cnt_max == 0:
-        #     rule_fans_cnt_max = 100000000
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
 
-        # rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
-        # rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
-        # if rule_videos_cnt_max == 0:
-        #     rule_videos_cnt_max = 100000000
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
 
         rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
         rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
         if rule_like_cnt_max == 0:
             rule_like_cnt_max = 100000000
 
-        # rule_width_min = rule_dict.get('width', {}).get('min', 0)
-        # rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
-        # if rule_width_max == 0:
-        #     rule_width_max = 100000000
-        #
-        # rule_height_min = rule_dict.get('height', {}).get('min', 0)
-        # rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
-        # if rule_height_max == 0:
-        #     rule_height_max = 100000000
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
+
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
 
         rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
         rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
@@ -89,19 +89,39 @@ class GongzhonghaoAuthor3:
         if rule_comment_cnt_max == 0:
             rule_comment_cnt_max = 100000000
 
-        Common.logger(log_type, crawler).info(f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
-        Common.logger(log_type, crawler).info(f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
-        Common.logger(log_type, crawler).info(f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
+
+        Common.logger(log_type, crawler).info(
+            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
-                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
                 and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
                 and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
-                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min):
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min) \
+                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min):
             return True
         else:
             return False
@@ -127,6 +147,7 @@ class GongzhonghaoAuthor3:
         if len(configs) == 0:
             # Common.logger(log_type, crawler).warning(f"公众号_3未配置token")
             Feishu.bot(log_type, crawler, "公众号_3:未配置token")
+            time.sleep(60)
             return None
         token_dict = {
             "token_id": configs[0]["id"],
@@ -436,6 +457,10 @@ class GongzhonghaoAuthor3:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
             return
+        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
+            shutil.rmtree(f"./{crawler}/videos/{md_title}")
+            Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
                                title=video_dict["video_title"], url=video_dict["cover_url"])

+ 50 - 25
gongzhonghao/gongzhonghao_author/gongzhonghao4_author.py

@@ -54,30 +54,30 @@ class GongzhonghaoAuthor4:
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
 
-        # rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
-        # rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
-        # if rule_fans_cnt_max == 0:
-        #     rule_fans_cnt_max = 100000000
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
 
-        # rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
-        # rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
-        # if rule_videos_cnt_max == 0:
-        #     rule_videos_cnt_max = 100000000
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
 
         rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
         rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
         if rule_like_cnt_max == 0:
             rule_like_cnt_max = 100000000
 
-        # rule_width_min = rule_dict.get('width', {}).get('min', 0)
-        # rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
-        # if rule_width_max == 0:
-        #     rule_width_max = 100000000
-        #
-        # rule_height_min = rule_dict.get('height', {}).get('min', 0)
-        # rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
-        # if rule_height_max == 0:
-        #     rule_height_max = 100000000
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
+
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
 
         rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
         rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
@@ -89,19 +89,39 @@ class GongzhonghaoAuthor4:
         if rule_comment_cnt_max == 0:
             rule_comment_cnt_max = 100000000
 
-        Common.logger(log_type, crawler).info(f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
-        Common.logger(log_type, crawler).info(f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
-        Common.logger(log_type, crawler).info(f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
+
+        Common.logger(log_type, crawler).info(
+            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
-                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
                 and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
                 and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
-                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min):
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min) \
+                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min):
             return True
         else:
             return False
@@ -127,6 +147,7 @@ class GongzhonghaoAuthor4:
         if len(configs) == 0:
             # Common.logger(log_type, crawler).warning(f"公众号_3未配置token")
             Feishu.bot(log_type, crawler, "公众号_4:未配置token")
+            time.sleep(60)
             return None
         token_dict = {
             "token_id": configs[0]["id"],
@@ -436,6 +457,10 @@ class GongzhonghaoAuthor4:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
             return
+        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
+            shutil.rmtree(f"./{crawler}/videos/{md_title}")
+            Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
                                title=video_dict["video_title"], url=video_dict["cover_url"])

+ 50 - 25
gongzhonghao/gongzhonghao_author/gongzhonghao5_author.py

@@ -54,30 +54,30 @@ class GongzhonghaoAuthor5:
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
 
-        # rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
-        # rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
-        # if rule_fans_cnt_max == 0:
-        #     rule_fans_cnt_max = 100000000
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
 
-        # rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
-        # rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
-        # if rule_videos_cnt_max == 0:
-        #     rule_videos_cnt_max = 100000000
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
 
         rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
         rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
         if rule_like_cnt_max == 0:
             rule_like_cnt_max = 100000000
 
-        # rule_width_min = rule_dict.get('width', {}).get('min', 0)
-        # rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
-        # if rule_width_max == 0:
-        #     rule_width_max = 100000000
-        #
-        # rule_height_min = rule_dict.get('height', {}).get('min', 0)
-        # rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
-        # if rule_height_max == 0:
-        #     rule_height_max = 100000000
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
+
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
 
         rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
         rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
@@ -89,19 +89,39 @@ class GongzhonghaoAuthor5:
         if rule_comment_cnt_max == 0:
             rule_comment_cnt_max = 100000000
 
-        Common.logger(log_type, crawler).info(f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
-        Common.logger(log_type, crawler).info(f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
-        Common.logger(log_type, crawler).info(f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
+
+        Common.logger(log_type, crawler).info(
+            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
-                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
                 and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
                 and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
-                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min):
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min) \
+                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min):
             return True
         else:
             return False
@@ -127,6 +147,7 @@ class GongzhonghaoAuthor5:
         if len(configs) == 0:
             # Common.logger(log_type, crawler).warning(f"公众号_3未配置token")
             Feishu.bot(log_type, crawler, "公众号_5:未配置token")
+            time.sleep(60)
             return None
         token_dict = {
             "token_id": configs[0]["id"],
@@ -436,6 +457,10 @@ class GongzhonghaoAuthor5:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
             return
+        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
+            shutil.rmtree(f"./{crawler}/videos/{md_title}")
+            Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
                                title=video_dict["video_title"], url=video_dict["cover_url"])

+ 64 - 48
xiaoniangao/xiaoniangao_author/xiaoniangao_author_scheduling.py

@@ -34,10 +34,10 @@ class XiaoniangaoAuthorScheduling:
         :param rule_dict: 规则信息,字典格式
         :return: 满足规则,返回 True;反之,返回 False
         """
-        rule_playCnt_min = rule_dict.get('playCnt', {}).get('min', 0)
-        rule_playCnt_max = rule_dict.get('playCnt', {}).get('max', 100000000)
-        if rule_playCnt_max == 0:
-            rule_playCnt_max = 100000000
+        rule_play_cnt_min = rule_dict.get('play_cnt', {}).get('min', 0)
+        rule_play_cnt_max = rule_dict.get('play_cnt', {}).get('max', 100000000)
+        if rule_play_cnt_max == 0:
+            rule_play_cnt_max = 100000000
 
         rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
         rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
@@ -48,59 +48,75 @@ class XiaoniangaoAuthorScheduling:
         # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
-        #
-        # rule_fans_min = rule_dict.get('fans', {}).get('min', 0)
-        # rule_fans_max = rule_dict.get('fans', {}).get('max', 100000000)
-        # if rule_fans_max == 0:
-        #     rule_fans_max = 100000000
-        #
-        # rule_videos_min = rule_dict.get('videos', {}).get('min', 0)
-        # rule_videos_max = rule_dict.get('videos', {}).get('max', 100000000)
-        # if rule_videos_max == 0:
-        #     rule_videos_max = 100000000
 
-        rule_like_min = rule_dict.get('like', {}).get('min', 0)
-        rule_like_max = rule_dict.get('like', {}).get('max', 100000000)
-        if rule_like_max == 0:
-            rule_like_max = 100000000
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
 
-        rule_videoWidth_min = rule_dict.get('videoWidth', {}).get('min', 0)
-        rule_videoWidth_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
-        if rule_videoWidth_max == 0:
-            rule_videoWidth_max = 100000000
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
 
-        rule_videoHeight_min = rule_dict.get('videoHeight', {}).get('min', 0)
-        rule_videoHeight_max = rule_dict.get('videoHeight', {}).get('max', 100000000)
-        if rule_videoHeight_max == 0:
-            rule_videoHeight_max = 100000000
+        rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
+        rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
+        if rule_like_cnt_max == 0:
+            rule_like_cnt_max = 100000000
 
-        rule_shareCnt_min = rule_dict.get('shareCnt', {}).get('min', 0)
-        rule_shareCnt_max = rule_dict.get('shareCnt', {}).get('max', 100000000)
-        if rule_shareCnt_max == 0:
-            rule_shareCnt_max = 100000000
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
 
-        rule_commentCnt_min = rule_dict.get('commentCnt', {}).get('min', 0)
-        rule_commentCnt_max = rule_dict.get('commentCnt', {}).get('max', 100000000)
-        if rule_commentCnt_max == 0:
-            rule_commentCnt_max = 100000000
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
 
-        Common.logger(log_type, crawler).info(f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
-        Common.logger(log_type, crawler).info(f'rule_playCnt_max:{int(rule_playCnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_playCnt_min:{int(rule_playCnt_min)}')
-        Common.logger(log_type, crawler).info(f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
-        Common.logger(log_type, crawler).info(f'rule_like_max:{int(rule_like_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_min:{int(rule_like_min)}')
-        Common.logger(log_type, crawler).info(f'rule_commentCnt_max:{int(rule_commentCnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_commentCnt_min:{int(rule_commentCnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_shareCnt_max:{int(rule_shareCnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_shareCnt_min:{int(rule_shareCnt_min)}')
-        Common.logger(log_type, crawler).info(f'rule_videoWidth_max:{int(rule_videoWidth_max)} >= video_width:{int(video_dict["video_width"])} >= rule_videoWidth_min:{int(rule_videoWidth_min)}')
-        Common.logger(log_type, crawler).info(f'rule_videoHeight_max:{int(rule_videoHeight_max)} >= video_height:{int(video_dict["video_height"])} >= rule_videoHeight_min:{int(rule_videoHeight_min)}')
+        rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
+        rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
+        if rule_share_cnt_max == 0:
+            rule_share_cnt_max = 100000000
+
+        rule_comment_cnt_min = rule_dict.get('comment_cnt', {}).get('min', 0)
+        rule_comment_cnt_max = rule_dict.get('comment_cnt', {}).get('max', 100000000)
+        if rule_comment_cnt_max == 0:
+            rule_comment_cnt_max = 100000000
+
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
+
+        Common.logger(log_type, crawler).info(
+            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
-                and int(rule_playCnt_max) >= int(video_dict['play_cnt']) >= int(rule_playCnt_min) \
+                and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
                 and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
-                and int(rule_like_max) >= int(video_dict['like_cnt']) >= int(rule_like_min) \
-                and int(rule_commentCnt_max) >= int(video_dict['comment_cnt']) >= int(rule_commentCnt_min) \
-                and int(rule_shareCnt_max) >= int(video_dict['share_cnt']) >= int(rule_shareCnt_min) \
-                and int(rule_videoWidth_max) >= int(video_dict['video_width']) >= int(rule_videoWidth_min) \
-                and int(rule_videoHeight_max) >= int(video_dict['video_height']) >= int(rule_videoHeight_min):
+                and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
+                and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min):
             return True
         else:
             return False

+ 28 - 25
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py

@@ -24,6 +24,14 @@ proxies = {"http": None, "https": None}
 class XiaoniangaoHour:
     platform = "小年糕"
 
+    words = "abcdefghijklmnopqrstuvwxyz0123456789"
+    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
+    token = "".join(random.sample(words, 32))
+    uid_token_dict = {
+        "uid": uid,
+        "token": token
+    }
+
     # 生成 uid、token
     @classmethod
     def get_uid_token(cls):
@@ -94,7 +102,7 @@ class XiaoniangaoHour:
     @classmethod
     def get_videoList(cls, log_type, crawler, env):
         # try:
-        uid_token_dict = cls.get_uid_token()
+        uid_token_dict = cls.uid_token_dict
         url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
         headers = {
             # "x-b3-traceid": cls.hour_x_b3_traceid,
@@ -353,18 +361,16 @@ class XiaoniangaoHour:
                     "{publish_time_str}",
                     {video_play_cnt},
                     {int(time.time())},
-                    "{time.strftime("%Y-%y-%d %H:%M:%S", time.localtime(int(time.time())))}"
+                    "{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))}"
                     )"""
                     Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
                     MysqlHelper.update_values(log_type, crawler, insert_sql, env)
                     Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-        # except Exception as e:
-        #     Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
 
     @classmethod
     def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
         # try:
-        uid_token_dict = cls.get_uid_token()
+        uid_token_dict = cls.uid_token_dict
         url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
         headers = {
             # "x-b3-traceid": cls.hour_x_b3_traceid,
@@ -462,9 +468,6 @@ class XiaoniangaoHour:
             }
             return video_info_dict
 
-        # except Exception as e:
-        #     Common.logger(log_type, crawler).error(f"download_video:{e}\n")
-
     # 更新小时榜数据
     @classmethod
     def update_videoList(cls, log_type, crawler, strategy, oss_endpoint, env):
@@ -528,8 +531,6 @@ class XiaoniangaoHour:
                                      env)
             else:
                 pass
-        # except Exception as e:
-        #     Common.logger(log_type, crawler).error(f"update_videoList:{e}\n")
 
     @classmethod
     def download(cls, log_type, crawler, video_info_dict, strategy, oss_endpoint, env):
@@ -633,47 +634,49 @@ class XiaoniangaoHour:
         if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
             Common.logger(log_type, crawler).info('视频已下载\n')
         # 播放量大于 50000,直接下载
-        elif int(video_info_dict["play_cnt"]) >= 50000:
+        elif int(video_info_dict["play_cnt"]) >= 30000:
             Common.logger(log_type, crawler).info(
-                f"播放量:{video_info_dict['play_cnt']} >= 50000,满足下载规则,开始下载视频")
+                f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
             cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
 
         # 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
-        elif int(update_video_info['ten_play_cnt']) >= 5000 or int(
-                update_video_info['fifteen_play_cnt']) >= 5000 or int(update_video_info['twenty_play_cnt']) >= 5000:
+        elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
+                update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
             Common.logger(log_type, crawler).info(
-                f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 5000")
+                f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
 
-        elif int(update_video_info['ten_play_cnt']) >= 2000 and int(update_video_info['fifteen_play_cnt']) >= 2000:
+        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
-                f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 2000")
+                f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
 
-        elif int(update_video_info['fifteen_play_cnt']) >= 2000 and int(update_video_info['twenty_play_cnt']) >= 2000:
+        elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
-                f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 2000")
+                f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
 
-        elif int(update_video_info['ten_play_cnt']) >= 2000 and int(update_video_info['twenty_play_cnt']) >= 2000:
+        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
-                f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 2000")
+                f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
 
         else:
             Common.logger(log_type, crawler).info("上升量不满足下载规则")
-    # except Exception as e:
-    #     Common.logger(log_type, crawler).error(f"download_publish:{e}\n")
 
 
 if __name__ == "__main__":
-    print(XiaoniangaoHour.get_expression())
+    # print(XiaoniangaoHour.get_expression())
     # print(XiaoniangaoHour.get_uid_token())
     # XiaoniangaoHour.get_videoList("test", "xiaoniangao", "dev")
     # XiaoniangaoHour.update_videoList("test", "xiaoniangao", "小时榜爬虫策略", "out", "dev")
-
+    # befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
+    # update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
+    # print(update_time_stamp)
+    # print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))))
+    print(XiaoniangaoHour.uid_token_dict)
     pass

+ 79 - 64
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour_scheduling.py

@@ -21,6 +21,13 @@ proxies = {"http": None, "https": None}
 
 class XiaoniangaoHourScheduling:
     platform = "小年糕"
+    words = "abcdefghijklmnopqrstuvwxyz0123456789"
+    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
+    token = "".join(random.sample(words, 32))
+    uid_token_dict = {
+        "uid": uid,
+        "token": token
+    }
 
     # 生成 uid、token
     @classmethod
@@ -45,10 +52,10 @@ class XiaoniangaoHourScheduling:
         :param rule_dict: 规则信息,字典格式
         :return: 满足规则,返回 True;反之,返回 False
         """
-        rule_playCnt_min = rule_dict.get('playCnt', {}).get('min', 0)
-        rule_playCnt_max = rule_dict.get('playCnt', {}).get('max', 100000000)
-        if rule_playCnt_max == 0:
-            rule_playCnt_max = 100000000
+        rule_play_cnt_min = rule_dict.get('play_cnt', {}).get('min', 0)
+        rule_play_cnt_max = rule_dict.get('play_cnt', {}).get('max', 100000000)
+        if rule_play_cnt_max == 0:
+            rule_play_cnt_max = 100000000
 
         rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
         rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
@@ -59,67 +66,75 @@ class XiaoniangaoHourScheduling:
         # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
-        #
-        # rule_fans_min = rule_dict.get('fans', {}).get('min', 0)
-        # rule_fans_max = rule_dict.get('fans', {}).get('max', 100000000)
-        # if rule_fans_max == 0:
-        #     rule_fans_max = 100000000
-        #
-        # rule_videos_min = rule_dict.get('videos', {}).get('min', 0)
-        # rule_videos_max = rule_dict.get('videos', {}).get('max', 100000000)
-        # if rule_videos_max == 0:
-        #     rule_videos_max = 100000000
-
-        rule_like_min = rule_dict.get('like', {}).get('min', 0)
-        rule_like_max = rule_dict.get('like', {}).get('max', 100000000)
-        if rule_like_max == 0:
-            rule_like_max = 100000000
-
-        rule_videoWidth_min = rule_dict.get('videoWidth', {}).get('min', 0)
-        rule_videoWidth_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
-        if rule_videoWidth_max == 0:
-            rule_videoWidth_max = 100000000
-
-        rule_videoHeight_min = rule_dict.get('videoHeight', {}).get('min', 0)
-        rule_videoHeight_max = rule_dict.get('videoHeight', {}).get('max', 100000000)
-        if rule_videoHeight_max == 0:
-            rule_videoHeight_max = 100000000
-
-        rule_shareCnt_min = rule_dict.get('shareCnt', {}).get('min', 0)
-        rule_shareCnt_max = rule_dict.get('shareCnt', {}).get('max', 100000000)
-        if rule_shareCnt_max == 0:
-            rule_shareCnt_max = 100000000
-
-        rule_commentCnt_min = rule_dict.get('commentCnt', {}).get('min', 0)
-        rule_commentCnt_max = rule_dict.get('commentCnt', {}).get('max', 100000000)
-        if rule_commentCnt_max == 0:
-            rule_commentCnt_max = 100000000
+
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
+
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
+
+        rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
+        rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
+        if rule_like_cnt_max == 0:
+            rule_like_cnt_max = 100000000
+
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
+
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
+
+        rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
+        rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
+        if rule_share_cnt_max == 0:
+            rule_share_cnt_max = 100000000
+
+        rule_comment_cnt_min = rule_dict.get('comment_cnt', {}).get('min', 0)
+        rule_comment_cnt_max = rule_dict.get('comment_cnt', {}).get('max', 100000000)
+        if rule_comment_cnt_max == 0:
+            rule_comment_cnt_max = 100000000
+
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
 
         Common.logger(log_type, crawler).info(
             f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_playCnt_max:{int(rule_playCnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_playCnt_min:{int(rule_playCnt_min)}')
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
         Common.logger(log_type, crawler).info(
             f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_like_max:{int(rule_like_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_min:{int(rule_like_min)}')
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_commentCnt_max:{int(rule_commentCnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_commentCnt_min:{int(rule_commentCnt_min)}')
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_shareCnt_max:{int(rule_shareCnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_shareCnt_min:{int(rule_shareCnt_min)}')
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_videoWidth_max:{int(rule_videoWidth_max)} >= video_width:{int(video_dict["video_width"])} >= rule_videoWidth_min:{int(rule_videoWidth_min)}')
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_videoHeight_max:{int(rule_videoHeight_max)} >= video_height:{int(video_dict["video_height"])} >= rule_videoHeight_min:{int(rule_videoHeight_min)}')
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
-                and int(rule_playCnt_max) >= int(video_dict['play_cnt']) >= int(rule_playCnt_min) \
+                and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
                 and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
-                and int(rule_like_max) >= int(video_dict['like_cnt']) >= int(rule_like_min) \
-                and int(rule_commentCnt_max) >= int(video_dict['comment_cnt']) >= int(rule_commentCnt_min) \
-                and int(rule_shareCnt_max) >= int(video_dict['share_cnt']) >= int(rule_shareCnt_min) \
-                and int(rule_videoWidth_max) >= int(video_dict['video_width']) >= int(rule_videoWidth_min) \
-                and int(rule_videoHeight_max) >= int(video_dict['video_height']) >= int(rule_videoHeight_min):
+                and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
+                and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min):
             return True
         else:
             return False
@@ -139,7 +154,7 @@ class XiaoniangaoHourScheduling:
     # 获取列表
     @classmethod
     def get_videoList(cls, log_type, crawler, rule_dict, env):
-        uid_token_dict = cls.get_uid_token()
+        uid_token_dict = cls.uid_token_dict
         url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
         headers = {
             "x-b3-traceid": '1c403a4aa72e3c',
@@ -334,7 +349,7 @@ class XiaoniangaoHourScheduling:
 
     @classmethod
     def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
-        uid_token_dict = cls.get_uid_token()
+        uid_token_dict = cls.uid_token_dict
         url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
         headers = {
             "x-b3-traceid": '1c403a4aa72e3c',
@@ -609,9 +624,9 @@ class XiaoniangaoHourScheduling:
         if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
             Common.logger(log_type, crawler).info('视频已下载\n')
         # 播放量大于 50000,直接下载
-        elif int(video_info_dict["play_cnt"]) >= 50000:
+        elif int(video_info_dict["play_cnt"]) >= 30000:
             Common.logger(log_type, crawler).info(
-                f"播放量:{video_info_dict['play_cnt']} >= 50000,满足下载规则,开始下载视频")
+                f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
                          video_info_dict=video_info_dict,
@@ -621,10 +636,10 @@ class XiaoniangaoHourScheduling:
                          env=env)
 
         # 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
-        elif int(update_video_info['ten_play_cnt']) >= 5000 or int(
-                update_video_info['fifteen_play_cnt']) >= 5000 or int(update_video_info['twenty_play_cnt']) >= 5000:
+        elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
+                update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
             Common.logger(log_type, crawler).info(
-                f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 5000")
+                f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
@@ -634,9 +649,9 @@ class XiaoniangaoHourScheduling:
                          oss_endpoint=oss_endpoint,
                          env=env)
 
-        elif int(update_video_info['ten_play_cnt']) >= 2000 and int(update_video_info['fifteen_play_cnt']) >= 2000:
+        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
-                f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 2000")
+                f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
@@ -646,9 +661,9 @@ class XiaoniangaoHourScheduling:
                          oss_endpoint=oss_endpoint,
                          env=env)
 
-        elif int(update_video_info['fifteen_play_cnt']) >= 2000 and int(update_video_info['twenty_play_cnt']) >= 2000:
+        elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
-                f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 2000")
+                f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
@@ -658,9 +673,9 @@ class XiaoniangaoHourScheduling:
                          oss_endpoint=oss_endpoint,
                          env=env)
 
-        elif int(update_video_info['ten_play_cnt']) >= 2000 and int(update_video_info['twenty_play_cnt']) >= 2000:
+        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
-                f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 2000")
+                f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,

+ 10 - 16
xiaoniangao/xiaoniangao_play/xiaoniangao_play.py

@@ -23,6 +23,14 @@ proxies = {"http": None, "https": None}
 class XiaoniangaoPlay:
     platform = "小年糕"
 
+    words = "abcdefghijklmnopqrstuvwxyz0123456789"
+    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
+    token = "".join(random.sample(words, 32))
+    uid_token_dict = {
+        "uid": uid,
+        "token": token
+    }
+
     # 生成 uid、token
     @classmethod
     def get_uid_token(cls):
@@ -48,7 +56,7 @@ class XiaoniangaoPlay:
             # 宽或高
             if int(video_dict['video_width']) >= 0 or int(video_dict['video_height']) >= 0:
                 # 播放量
-                if int(video_dict['play_cnt']) >= 80000:
+                if int(video_dict['play_cnt']) >= 20000:
                     # 点赞量
                     if int(video_dict['like_cnt']) >= 0:
                         # 分享量
@@ -79,21 +87,17 @@ class XiaoniangaoPlay:
     # 获取列表
     @classmethod
     def get_videoList(cls, log_type, crawler, strategy, oss_endpoint, env):
-        uid_token_dict = cls.get_uid_token()
+        uid_token_dict = cls.uid_token_dict
         url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
         headers = {
-            # "x-b3-traceid": cls.play_x_b3_traceid,
             "x-b3-traceid": '1dc0a6d0929a2b',
-            # "X-Token-Id": cls.play_x_token_id,
             "X-Token-Id": 'ae99a4953804085ebb0ae36fa138031d-1146052582',
-            # "uid": cls.play_uid,
             "uid": uid_token_dict['uid'],
             "content-type": "application/json",
             "Accept-Encoding": "gzip,compress,br,deflate",
             "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
                           ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
                           'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
-            # "Referer": cls.play_referer
             "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/620/page-frame.html'
         }
         data = {
@@ -141,15 +145,12 @@ class XiaoniangaoPlay:
                 "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
             },
             "refresh": False,
-            # "token": cls.play_token,
             "token": uid_token_dict['token'],
-            # "uid": cls.play_uid,
             "uid": uid_token_dict['uid'],
             "proj": "ma",
             "wx_ver": "8.0.20",
             "code_ver": "3.62.0"
         }
-        # try:
         urllib3.disable_warnings()
         r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
         if "data" not in r.text or r.status_code != 200:
@@ -306,9 +307,6 @@ class XiaoniangaoPlay:
                                      oss_endpoint=oss_endpoint,
                                      env=env)
 
-        # except Exception as e:
-        #     Common.logger(log_type, crawler).error("get_play_feeds异常:{}", e)
-
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
         sql = f""" select * from crawler_video where platform="小年糕" and out_video_id="{video_id}"; """
@@ -317,7 +315,6 @@ class XiaoniangaoPlay:
 
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, strategy, oss_endpoint, env):
-        # try:
         # 过滤无效视频
         if video_dict["video_id"] == 0 \
                 or video_dict["video_url"] == 0\
@@ -424,9 +421,6 @@ class XiaoniangaoPlay:
             Feishu.update_values(log_type, crawler, "c85k1C", "F2:Z2", values)
             Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
 
-        # except Exception as e:
-        #     Common.logger(log_type, crawler).error('download_publish异常:{}', e)
-
 
 if __name__ == '__main__':
     XiaoniangaoPlay.get_videoList("play", "xiaoniangao", "播放量榜爬虫策略", "out", "dev")

+ 62 - 48
xiaoniangao/xiaoniangao_play/xiaoniangao_play_scheduling.py

@@ -20,6 +20,13 @@ proxies = {"http": None, "https": None}
 
 class XiaoniangaoPlayScheduling:
     platform = "小年糕"
+    words = "abcdefghijklmnopqrstuvwxyz0123456789"
+    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
+    token = "".join(random.sample(words, 32))
+    uid_token_dict = {
+        "uid": uid,
+        "token": token
+    }
 
     # 生成 uid、token
     @classmethod
@@ -44,10 +51,10 @@ class XiaoniangaoPlayScheduling:
         :param rule_dict: 规则信息,字典格式
         :return: 满足规则,返回 True;反之,返回 False
         """
-        rule_playCnt_min = rule_dict.get('playCnt', {}).get('min', 0)
-        rule_playCnt_max = rule_dict.get('playCnt', {}).get('max', 100000000)
-        if rule_playCnt_max == 0:
-            rule_playCnt_max = 100000000
+        rule_play_cnt_min = rule_dict.get('play_cnt', {}).get('min', 0)
+        rule_play_cnt_max = rule_dict.get('play_cnt', {}).get('max', 100000000)
+        if rule_play_cnt_max == 0:
+            rule_play_cnt_max = 100000000
 
         rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
         rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
@@ -58,67 +65,75 @@ class XiaoniangaoPlayScheduling:
         # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
-        #
-        # rule_fans_min = rule_dict.get('fans', {}).get('min', 0)
-        # rule_fans_max = rule_dict.get('fans', {}).get('max', 100000000)
-        # if rule_fans_max == 0:
-        #     rule_fans_max = 100000000
-        #
-        # rule_videos_min = rule_dict.get('videos', {}).get('min', 0)
-        # rule_videos_max = rule_dict.get('videos', {}).get('max', 100000000)
-        # if rule_videos_max == 0:
-        #     rule_videos_max = 100000000
 
-        rule_like_min = rule_dict.get('like', {}).get('min', 0)
-        rule_like_max = rule_dict.get('like', {}).get('max', 100000000)
-        if rule_like_max == 0:
-            rule_like_max = 100000000
+        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
+        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
+        if rule_fans_cnt_max == 0:
+            rule_fans_cnt_max = 100000000
 
-        rule_videoWidth_min = rule_dict.get('videoWidth', {}).get('min', 0)
-        rule_videoWidth_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
-        if rule_videoWidth_max == 0:
-            rule_videoWidth_max = 100000000
+        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
+        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
+        if rule_videos_cnt_max == 0:
+            rule_videos_cnt_max = 100000000
 
-        rule_videoHeight_min = rule_dict.get('videoHeight', {}).get('min', 0)
-        rule_videoHeight_max = rule_dict.get('videoHeight', {}).get('max', 100000000)
-        if rule_videoHeight_max == 0:
-            rule_videoHeight_max = 100000000
+        rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
+        rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
+        if rule_like_cnt_max == 0:
+            rule_like_cnt_max = 100000000
 
-        rule_shareCnt_min = rule_dict.get('shareCnt', {}).get('min', 0)
-        rule_shareCnt_max = rule_dict.get('shareCnt', {}).get('max', 100000000)
-        if rule_shareCnt_max == 0:
-            rule_shareCnt_max = 100000000
+        rule_width_min = rule_dict.get('width', {}).get('min', 0)
+        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
+        if rule_width_max == 0:
+            rule_width_max = 100000000
 
-        rule_commentCnt_min = rule_dict.get('commentCnt', {}).get('min', 0)
-        rule_commentCnt_max = rule_dict.get('commentCnt', {}).get('max', 100000000)
-        if rule_commentCnt_max == 0:
-            rule_commentCnt_max = 100000000
+        rule_height_min = rule_dict.get('height', {}).get('min', 0)
+        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
+        if rule_height_max == 0:
+            rule_height_max = 100000000
+
+        rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
+        rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
+        if rule_share_cnt_max == 0:
+            rule_share_cnt_max = 100000000
+
+        rule_comment_cnt_min = rule_dict.get('comment_cnt', {}).get('min', 0)
+        rule_comment_cnt_max = rule_dict.get('comment_cnt', {}).get('max', 100000000)
+        if rule_comment_cnt_max == 0:
+            rule_comment_cnt_max = 100000000
+
+        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
+        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
+        if rule_publish_time_max == 0:
+            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
 
         Common.logger(log_type, crawler).info(
             f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_playCnt_max:{int(rule_playCnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_playCnt_min:{int(rule_playCnt_min)}')
+            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
         Common.logger(log_type, crawler).info(
             f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_like_max:{int(rule_like_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_min:{int(rule_like_min)}')
+            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
+        Common.logger(log_type, crawler).info(
+            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_commentCnt_max:{int(rule_commentCnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_commentCnt_min:{int(rule_commentCnt_min)}')
+            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_shareCnt_max:{int(rule_shareCnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_shareCnt_min:{int(rule_shareCnt_min)}')
+            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_videoWidth_max:{int(rule_videoWidth_max)} >= video_width:{int(video_dict["video_width"])} >= rule_videoWidth_min:{int(rule_videoWidth_min)}')
+            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_videoHeight_max:{int(rule_videoHeight_max)} >= video_height:{int(video_dict["video_height"])} >= rule_videoHeight_min:{int(rule_videoHeight_min)}')
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
-                and int(rule_playCnt_max) >= int(video_dict['play_cnt']) >= int(rule_playCnt_min) \
+                and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
                 and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
-                and int(rule_like_max) >= int(video_dict['like_cnt']) >= int(rule_like_min) \
-                and int(rule_commentCnt_max) >= int(video_dict['comment_cnt']) >= int(rule_commentCnt_min) \
-                and int(rule_shareCnt_max) >= int(video_dict['share_cnt']) >= int(rule_shareCnt_min) \
-                and int(rule_videoWidth_max) >= int(video_dict['video_width']) >= int(rule_videoWidth_min) \
-                and int(rule_videoHeight_max) >= int(video_dict['video_height']) >= int(rule_videoHeight_min):
+                and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
+                and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
+                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
+                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
+                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min)\
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min):
             return True
         else:
             return False
@@ -126,7 +141,7 @@ class XiaoniangaoPlayScheduling:
     # 获取列表
     @classmethod
     def get_videoList(cls, log_type, crawler, rule_dict, strategy, oss_endpoint, env):
-        uid_token_dict = cls.get_uid_token()
+        uid_token_dict = cls.uid_token_dict
         url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
         headers = {
             "x-b3-traceid": '1dc0a6d0929a2b',
@@ -393,6 +408,5 @@ class XiaoniangaoPlayScheduling:
 
 
 if __name__ == '__main__':
-    XiaoniangaoPlayScheduling.get_videoList("play", "xiaoniangao", "播放量榜爬虫策略", "out", "dev")
 
     pass