1 年之前 · 12e79e190d
--- a/douyin/douyin_author/douyin_author_scheduling_new.py
+++ b/douyin/douyin_author/douyin_author_scheduling_new.py
@@ -55,11 +55,33 @@ class DouyinauthorScheduling:
 
				         mq = MQ(topic_name="topic_crawler_etl_" + env)
			
 
				         next_cursor = 0
			
 
				         while True:
			
 
				+            flag = user_dict["link"].split("_")[0]
			
 
				+            if flag == "V1":
			
 
				+                rule_dict = {
			
 
				+                    "play_cnt": {"min": 100000, "max": 0},
			
 
				+                    'period': {"min": 15, "max": 15},
			
 
				+                    'special': 0.03
			
 
				+                }
			
 
				+            elif flag == "V2":
			
 
				+                rule_dict = {
			
 
				+                    "play_cnt": {"min": 80000, "max": 0},
			
 
				+                    'period': {"min": 7, "max": 7},
			
 
				+                    'special': 0.02
			
 
				+                }
			
 
				+            elif flag == "V3":
			
 
				+                rule_dict = {
			
 
				+                    "play_cnt": {"min": 10000, "max": 0},
			
 
				+                    'period': {"min": 3, "max": 3},
			
 
				+                    'special': 0.01
			
 
				+                }
			
 
				             cookie = cls.get_cookie(log_type, crawler, env)["cookie"]
			
 
				-
			
 
				+            if user_dict['link'][0] == "V":
			
 
				+                link = user_dict["link"][3:]
			
 
				+            else:
			
 
				+                link = user_dict["link"]
			
 
				             time.sleep(random.randint(5, 10))
			
 
				             url = 'https://www.douyin.com/aweme/v1/web/aweme/post/'
			
 
				-            account_id = user_dict["link"]
			
 
				+            account_id = link
			
 
				             headers = {
			
 
				                 'Accept': 'application/json, text/plain, */*',
			
 
				                 'Accept-Language': 'zh-CN,zh;q=0.9',
			
@@ -144,33 +166,18 @@ class DouyinauthorScheduling:
 
				                         comment_count = int(data[i].get('statistics').get('comment_count'))  # 评论
			
 
				                         # collect_count = data[i].get('statistics').get('collect_count')  # 收藏
			
 
				                         share_count = int(data[i].get('statistics').get('share_count'))  # 转发
			
 
				-                        date_three_days_ago_string = (date.today() + timedelta(days=-5)).strftime("%Y-%m-%d %H:%M:%S")
			
 
				-                        rule = publish_time_str > date_three_days_ago_string
			
 
				-                        if i > 2:
			
 
				-                            if rule == False:
			
 
				-                                break
			
 
				-                        if rule == False:
			
 
				-                            Common.logger(log_type, crawler).info(f"发布时间小于5天，发布时间：{publish_time_str}\n")
			
 
				+                        video_percent = '%.2f' % (share_count / digg_count)
			
 
				+                        special = float(rule_dict.get("special"))
			
 
				+                        if float(video_percent) < special:
			
 
				+                            Common.logger(log_type, crawler).info(f"不符合条件：分享/点赞-{video_percent}\n")
			
 
				                             AliyunLogger.logging(
			
 
				                                 code="2004",
			
 
				                                 platform=crawler,
			
 
				                                 mode=log_type,
			
 
				                                 env=env,
			
 
				-                                message=f"发布时间小于5天，发布时间：{publish_time_str}\n"
			
 
				+                                message=f"不符合条件：分享/点赞-{video_percent},点赞量-{digg_count}\n"
			
 
				                             )
			
 
				                             continue
			
 
				-                        video_percent = '%.2f' % (share_count / digg_count)
			
 
				-                        if digg_count < 50000 and digg_count < 50:
			
 
				-                            if float(video_percent) < 0.01:
			
 
				-                                Common.logger(log_type, crawler).info(f"不符合条件：分享/点赞-{video_percent},点赞量-{digg_count}\n")
			
 
				-                                AliyunLogger.logging(
			
 
				-                                    code="2004",
			
 
				-                                    platform=crawler,
			
 
				-                                    mode=log_type,
			
 
				-                                    env=env,
			
 
				-                                    message=f"不符合条件：分享/点赞-{video_percent},点赞量-{digg_count}\n"
			
 
				-                                )
			
 
				-                                continue
			
 
				                         video_dict = {'video_title': video_title,
			
 
				                                       'video_id': video_id,
			
 
				                                       'play_cnt': 0,
			
--- a/kuaishou/kuaishou_author/kuaishou_author_scheduling_new.py
+++ b/kuaishou/kuaishou_author/kuaishou_author_scheduling_new.py
@@ -81,12 +81,35 @@ class KuaishouauthorScheduling:
 
				         pcursor = ""
			
 
				         mq = MQ(topic_name="topic_crawler_etl_" + env)
			
 
				         while True:
			
 
				+            flag = user_dict["link"].split("_")[0]
			
 
				+            if flag == "V1":
			
 
				+                rule_dict = {
			
 
				+                    "play_cnt": {"min": 100000, "max": 0},
			
 
				+                    'period': {"min": 15, "max": 15},
			
 
				+                    'special': 0.03
			
 
				+                }
			
 
				+            elif flag == "V2":
			
 
				+                rule_dict = {
			
 
				+                    "play_cnt": {"min": 80000, "max": 0},
			
 
				+                    'period': {"min": 7, "max": 7},
			
 
				+                    'special': 0.02
			
 
				+                }
			
 
				+            elif flag == "V3":
			
 
				+                rule_dict = {
			
 
				+                    "play_cnt": {"min": 10000, "max": 0},
			
 
				+                    'period': {"min": 3, "max": 3},
			
 
				+                    'special': 0.01
			
 
				+                }
			
 
				             time.sleep(random.randint(10, 50))
			
 
				             url = "https://www.kuaishou.com/graphql"
			
 
				+            if user_dict['link'][0] == "V":
			
 
				+                link = user_dict["link"][3:]
			
 
				+            else:
			
 
				+                link = user_dict["link"]
			
 
				             payload = json.dumps({
			
 
				                 "operationName": "visionProfilePhotoList",
			
 
				                 "variables": {
			
 
				-                    "userId": user_dict["link"].replace("https://www.kuaishou.com/profile/", ""),
			
 
				+                    "userId": str(link.replace("https://www.kuaishou.com/profile/", "")),
			
 
				                     "pcursor": pcursor,
			
 
				                     "page": "profile"
			
 
				                 },
			
@@ -102,7 +125,7 @@ class KuaishouauthorScheduling:
 
				                 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
			
 
				                 'Host': 'www.kuaishou.com',
			
 
				                 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
			
 
				-                'Referer': f'https://www.kuaishou.com/profile/{user_dict["link"].replace("https://www.kuaishou.com/profile/", "")}',
			
 
				+                'Referer': f'https://www.kuaishou.com/profile/{link.replace("https://www.kuaishou.com/profile/", "")}',
			
 
				                 'Accept-Encoding': 'gzip, deflate, br',
			
 
				                 'Connection': 'keep-alive'
			
 
				             }
			
@@ -198,35 +221,20 @@ class KuaishouauthorScheduling:
 
				                         video_height = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("adaptationSet", {})[0].get("representation", {})[0].get("height", 0)
			
 
				                     publish_time_stamp = int(int(feeds[i].get('photo', {}).get('timestamp', 0)) / 1000)
			
 
				                     publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
			
 
				-                    date_three_days_ago_string = (date.today() + timedelta(days=-5)).strftime("%Y-%m-%d %H:%M:%S")
			
 
				-                    rule = publish_time_str > date_three_days_ago_string
			
 
				-                    if i > 2:
			
 
				-                        if rule == False:
			
 
				-                            break
			
 
				-                    if rule == False:
			
 
				-                        Common.logger(log_type, crawler).info(f"发布时间小于5天，发布时间：{publish_time_str}\n")
			
 
				+                    viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
			
 
				+                    realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
			
 
				+                    video_percent = '%.2f' % (realLikeCount / viewCount)
			
 
				+                    special = float(rule_dict.get("special"))
			
 
				+                    if float(video_percent) < special:
			
 
				+                        Common.logger(log_type, crawler).info(f"不符合条件：点赞/播放-{video_percent}\n")
			
 
				                         AliyunLogger.logging(
			
 
				                             code="2004",
			
 
				                             platform=crawler,
			
 
				                             mode=log_type,
			
 
				                             env=env,
			
 
				-                            message=f"发布时间小于5天，发布时间：{publish_time_str}\n"
			
 
				+                            message=f"点赞量：{realLikeCount}\n"
			
 
				                         )
			
 
				                         continue
			
 
				-                    viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
			
 
				-                    realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
			
 
				-                    video_percent = '%.2f' % (realLikeCount / viewCount)
			
 
				-                    if viewCount < 100000:
			
 
				-                        if float(video_percent) < 0.01:
			
 
				-                            Common.logger(log_type, crawler).info(f"不符合条件：点赞/播放-{video_percent},播放量-{viewCount}\n")
			
 
				-                            AliyunLogger.logging(
			
 
				-                                code="2004",
			
 
				-                                platform=crawler,
			
 
				-                                mode=log_type,
			
 
				-                                env=env,
			
 
				-                                message=f"点赞量：{realLikeCount}\n"
			
 
				-                            )
			
 
				-                            continue
			
 
				                     video_dict = {'video_title': video_title,
			
 
				                                   'video_id': video_id,
			
 
				                                   'play_cnt': int(feeds[i].get('photo', {}).get('viewCount', 0)),