瀏覽代碼

抖音、快手 分规则筛选

zhangyong 1 年之前
父節點
當前提交
12e79e190d

+ 29 - 22
douyin/douyin_author/douyin_author_scheduling_new.py

@@ -55,11 +55,33 @@ class DouyinauthorScheduling:
         mq = MQ(topic_name="topic_crawler_etl_" + env)
         next_cursor = 0
         while True:
+            flag = user_dict["link"].split("_")[0]
+            if flag == "V1":
+                rule_dict = {
+                    "play_cnt": {"min": 100000, "max": 0},
+                    'period': {"min": 15, "max": 15},
+                    'special': 0.03
+                }
+            elif flag == "V2":
+                rule_dict = {
+                    "play_cnt": {"min": 80000, "max": 0},
+                    'period': {"min": 7, "max": 7},
+                    'special': 0.02
+                }
+            elif flag == "V3":
+                rule_dict = {
+                    "play_cnt": {"min": 10000, "max": 0},
+                    'period': {"min": 3, "max": 3},
+                    'special': 0.01
+                }
             cookie = cls.get_cookie(log_type, crawler, env)["cookie"]
-
+            if user_dict['link'][0] == "V":
+                link = user_dict["link"][3:]
+            else:
+                link = user_dict["link"]
             time.sleep(random.randint(5, 10))
             url = 'https://www.douyin.com/aweme/v1/web/aweme/post/'
-            account_id = user_dict["link"]
+            account_id = link
             headers = {
                 'Accept': 'application/json, text/plain, */*',
                 'Accept-Language': 'zh-CN,zh;q=0.9',
@@ -144,33 +166,18 @@ class DouyinauthorScheduling:
                         comment_count = int(data[i].get('statistics').get('comment_count'))  # 评论
                         # collect_count = data[i].get('statistics').get('collect_count')  # 收藏
                         share_count = int(data[i].get('statistics').get('share_count'))  # 转发
-                        date_three_days_ago_string = (date.today() + timedelta(days=-5)).strftime("%Y-%m-%d %H:%M:%S")
-                        rule = publish_time_str > date_three_days_ago_string
-                        if i > 2:
-                            if rule == False:
-                                break
-                        if rule == False:
-                            Common.logger(log_type, crawler).info(f"发布时间小于5天,发布时间:{publish_time_str}\n")
+                        video_percent = '%.2f' % (share_count / digg_count)
+                        special = float(rule_dict.get("special"))
+                        if float(video_percent) < special:
+                            Common.logger(log_type, crawler).info(f"不符合条件:分享/点赞-{video_percent}\n")
                             AliyunLogger.logging(
                                 code="2004",
                                 platform=crawler,
                                 mode=log_type,
                                 env=env,
-                                message=f"发布时间小于5天,发布时间:{publish_time_str}\n"
+                                message=f"不符合条件:分享/点赞-{video_percent},点赞量-{digg_count}\n"
                             )
                             continue
-                        video_percent = '%.2f' % (share_count / digg_count)
-                        if digg_count < 50000 and digg_count < 50:
-                            if float(video_percent) < 0.01:
-                                Common.logger(log_type, crawler).info(f"不符合条件:分享/点赞-{video_percent},点赞量-{digg_count}\n")
-                                AliyunLogger.logging(
-                                    code="2004",
-                                    platform=crawler,
-                                    mode=log_type,
-                                    env=env,
-                                    message=f"不符合条件:分享/点赞-{video_percent},点赞量-{digg_count}\n"
-                                )
-                                continue
                         video_dict = {'video_title': video_title,
                                       'video_id': video_id,
                                       'play_cnt': 0,

+ 32 - 24
kuaishou/kuaishou_author/kuaishou_author_scheduling_new.py

@@ -81,12 +81,35 @@ class KuaishouauthorScheduling:
         pcursor = ""
         mq = MQ(topic_name="topic_crawler_etl_" + env)
         while True:
+            flag = user_dict["link"].split("_")[0]
+            if flag == "V1":
+                rule_dict = {
+                    "play_cnt": {"min": 100000, "max": 0},
+                    'period': {"min": 15, "max": 15},
+                    'special': 0.03
+                }
+            elif flag == "V2":
+                rule_dict = {
+                    "play_cnt": {"min": 80000, "max": 0},
+                    'period': {"min": 7, "max": 7},
+                    'special': 0.02
+                }
+            elif flag == "V3":
+                rule_dict = {
+                    "play_cnt": {"min": 10000, "max": 0},
+                    'period': {"min": 3, "max": 3},
+                    'special': 0.01
+                }
             time.sleep(random.randint(10, 50))
             url = "https://www.kuaishou.com/graphql"
+            if user_dict['link'][0] == "V":
+                link = user_dict["link"][3:]
+            else:
+                link = user_dict["link"]
             payload = json.dumps({
                 "operationName": "visionProfilePhotoList",
                 "variables": {
-                    "userId": user_dict["link"].replace("https://www.kuaishou.com/profile/", ""),
+                    "userId": str(link.replace("https://www.kuaishou.com/profile/", "")),
                     "pcursor": pcursor,
                     "page": "profile"
                 },
@@ -102,7 +125,7 @@ class KuaishouauthorScheduling:
                 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
                 'Host': 'www.kuaishou.com',
                 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
-                'Referer': f'https://www.kuaishou.com/profile/{user_dict["link"].replace("https://www.kuaishou.com/profile/", "")}',
+                'Referer': f'https://www.kuaishou.com/profile/{link.replace("https://www.kuaishou.com/profile/", "")}',
                 'Accept-Encoding': 'gzip, deflate, br',
                 'Connection': 'keep-alive'
             }
@@ -198,35 +221,20 @@ class KuaishouauthorScheduling:
                         video_height = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("adaptationSet", {})[0].get("representation", {})[0].get("height", 0)
                     publish_time_stamp = int(int(feeds[i].get('photo', {}).get('timestamp', 0)) / 1000)
                     publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-                    date_three_days_ago_string = (date.today() + timedelta(days=-5)).strftime("%Y-%m-%d %H:%M:%S")
-                    rule = publish_time_str > date_three_days_ago_string
-                    if i > 2:
-                        if rule == False:
-                            break
-                    if rule == False:
-                        Common.logger(log_type, crawler).info(f"发布时间小于5天,发布时间:{publish_time_str}\n")
+                    viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
+                    realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
+                    video_percent = '%.2f' % (realLikeCount / viewCount)
+                    special = float(rule_dict.get("special"))
+                    if float(video_percent) < special:
+                        Common.logger(log_type, crawler).info(f"不符合条件:点赞/播放-{video_percent}\n")
                         AliyunLogger.logging(
                             code="2004",
                             platform=crawler,
                             mode=log_type,
                             env=env,
-                            message=f"发布时间小于5天,发布时间:{publish_time_str}\n"
+                            message=f"点赞量:{realLikeCount}\n"
                         )
                         continue
-                    viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
-                    realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
-                    video_percent = '%.2f' % (realLikeCount / viewCount)
-                    if viewCount < 100000:
-                        if float(video_percent) < 0.01:
-                            Common.logger(log_type, crawler).info(f"不符合条件:点赞/播放-{video_percent},播放量-{viewCount}\n")
-                            AliyunLogger.logging(
-                                code="2004",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message=f"点赞量:{realLikeCount}\n"
-                            )
-                            continue
                     video_dict = {'video_title': video_title,
                                   'video_id': video_id,
                                   'play_cnt': int(feeds[i].get('photo', {}).get('viewCount', 0)),