|
@@ -81,12 +81,35 @@ class KuaishouauthorScheduling:
|
|
|
pcursor = ""
|
|
|
mq = MQ(topic_name="topic_crawler_etl_" + env)
|
|
|
while True:
|
|
|
+ flag = user_dict["link"].split("_")[0]
|
|
|
+ if flag == "V1":
|
|
|
+ rule_dict = {
|
|
|
+ "play_cnt": {"min": 100000, "max": 0},
|
|
|
+ 'period': {"min": 15, "max": 15},
|
|
|
+ 'special': 0.03
|
|
|
+ }
|
|
|
+ elif flag == "V2":
|
|
|
+ rule_dict = {
|
|
|
+ "play_cnt": {"min": 80000, "max": 0},
|
|
|
+ 'period': {"min": 7, "max": 7},
|
|
|
+ 'special': 0.02
|
|
|
+ }
|
|
|
+ elif flag == "V3":
|
|
|
+ rule_dict = {
|
|
|
+ "play_cnt": {"min": 10000, "max": 0},
|
|
|
+ 'period': {"min": 3, "max": 3},
|
|
|
+ 'special': 0.01
|
|
|
+ }
|
|
|
time.sleep(random.randint(10, 50))
|
|
|
url = "https://www.kuaishou.com/graphql"
|
|
|
+ if user_dict['link'][0] == "V":
|
|
|
+ link = user_dict["link"][3:]
|
|
|
+ else:
|
|
|
+ link = user_dict["link"]
|
|
|
payload = json.dumps({
|
|
|
"operationName": "visionProfilePhotoList",
|
|
|
"variables": {
|
|
|
- "userId": user_dict["link"].replace("https://www.kuaishou.com/profile/", ""),
|
|
|
+ "userId": str(link.replace("https://www.kuaishou.com/profile/", "")),
|
|
|
"pcursor": pcursor,
|
|
|
"page": "profile"
|
|
|
},
|
|
@@ -102,7 +125,7 @@ class KuaishouauthorScheduling:
|
|
|
'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
|
'Host': 'www.kuaishou.com',
|
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
|
|
|
- 'Referer': f'https://www.kuaishou.com/profile/{user_dict["link"].replace("https://www.kuaishou.com/profile/", "")}',
|
|
|
+ 'Referer': f'https://www.kuaishou.com/profile/{link.replace("https://www.kuaishou.com/profile/", "")}',
|
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
|
'Connection': 'keep-alive'
|
|
|
}
|
|
@@ -198,35 +221,20 @@ class KuaishouauthorScheduling:
|
|
|
video_height = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("adaptationSet", {})[0].get("representation", {})[0].get("height", 0)
|
|
|
publish_time_stamp = int(int(feeds[i].get('photo', {}).get('timestamp', 0)) / 1000)
|
|
|
publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
|
|
|
- date_three_days_ago_string = (date.today() + timedelta(days=-5)).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
- rule = publish_time_str > date_three_days_ago_string
|
|
|
- if i > 2:
|
|
|
- if rule == False:
|
|
|
- break
|
|
|
- if rule == False:
|
|
|
- Common.logger(log_type, crawler).info(f"发布时间小于5天,发布时间:{publish_time_str}\n")
|
|
|
+ viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
|
|
|
+ realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
|
|
|
+ video_percent = '%.2f' % (realLikeCount / viewCount)
|
|
|
+ special = float(rule_dict.get("special"))
|
|
|
+ if float(video_percent) < special:
|
|
|
+ Common.logger(log_type, crawler).info(f"不符合条件:点赞/播放-{video_percent}\n")
|
|
|
AliyunLogger.logging(
|
|
|
code="2004",
|
|
|
platform=crawler,
|
|
|
mode=log_type,
|
|
|
env=env,
|
|
|
- message=f"发布时间小于5天,发布时间:{publish_time_str}\n"
|
|
|
+ message=f"点赞量:{realLikeCount}\n"
|
|
|
)
|
|
|
continue
|
|
|
- viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
|
|
|
- realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
|
|
|
- video_percent = '%.2f' % (realLikeCount / viewCount)
|
|
|
- if viewCount < 100000:
|
|
|
- if float(video_percent) < 0.01:
|
|
|
- Common.logger(log_type, crawler).info(f"不符合条件:点赞/播放-{video_percent},播放量-{viewCount}\n")
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2004",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message=f"点赞量:{realLikeCount}\n"
|
|
|
- )
|
|
|
- continue
|
|
|
video_dict = {'video_title': video_title,
|
|
|
'video_id': video_id,
|
|
|
'play_cnt': int(feeds[i].get('photo', {}).get('viewCount', 0)),
|