|
@@ -86,6 +86,7 @@ class KuaishouauthorScheduling:
|
|
|
pcursor = ""
|
|
|
mq = MQ(topic_name="topic_crawler_etl_" + env)
|
|
|
count = 0
|
|
|
+ special = 0
|
|
|
for i in range(5):
|
|
|
# while True:
|
|
|
if count > 5:
|
|
@@ -99,20 +100,20 @@ class KuaishouauthorScheduling:
|
|
|
rule_dict = {
|
|
|
"play_cnt": {"min": 10000, "max": 0},
|
|
|
'period': {"min": 90, "max": 90},
|
|
|
- 'special': 0.01
|
|
|
}
|
|
|
+ special = 0.01
|
|
|
elif flag == "V2":
|
|
|
rule_dict = {
|
|
|
"play_cnt": {"min": 2000, "max": 0},
|
|
|
'period': {"min": 90, "max": 90},
|
|
|
- 'special': 0.01
|
|
|
}
|
|
|
+ special = 0.01
|
|
|
elif flag == "V3":
|
|
|
rule_dict = {
|
|
|
"play_cnt": {"min": 100, "max": 0},
|
|
|
'period': {"min": 90, "max": 90},
|
|
|
- 'special': 0.01
|
|
|
}
|
|
|
+ special = 0.01
|
|
|
else:
|
|
|
link = user_dict["link"]
|
|
|
|
|
@@ -131,6 +132,7 @@ class KuaishouauthorScheduling:
|
|
|
'Content-Type': 'application/json',
|
|
|
'Origin': 'https://www.kuaishou.com',
|
|
|
'Cookie': cookie,
|
|
|
+ # 'Cookie': "kpf=PC_WEB; clientid=3; did=web_b7830efe78a1e48daacb126b6f52ad05; userId=1299331643; didv=1711610963000; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABsqe9eHH9ZkvQs7beNfwe3IobrRFI_3HvvYRx7BGcVwVImonsmZgPXWpHF89uY-Fomry6l9EtimGUwt4EEF0xMeyMauj7ZvMndNTvjG8qro9yB-xtc6_iN0a6-peDQz6zxeUs6gQLkm58NFtCTDGsqWqMDs1ruDWlJiy-1GVqT59GI_AoJgukwXtTEaYFKzDSmUWv7qsJ0Ya1gDlsyPFO0hoSoJCKbxHIWXjzVWap_gGna5KjIiCdD9i3Uy2z2XGYS--wpIFP_h8k_AasD37nnju7rv_ocSgFMAE; kuaishou.server.web_ph=76097deb523bd30b37fa3d9f67200e5e4e72; kpn=KUAISHOU_VISION",
|
|
|
'Content-Length': '1260',
|
|
|
'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
|
'Host': 'www.kuaishou.com',
|
|
@@ -233,11 +235,13 @@ class KuaishouauthorScheduling:
|
|
|
video_height = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("adaptationSet", {})[0].get("representation", {})[0].get("height", 0)
|
|
|
publish_time_stamp = int(int(feeds[i].get('photo', {}).get('timestamp', 0)) / 1000)
|
|
|
publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
|
|
|
+ profile_userTop_photo = feeds[i].get('photo', {}).get('profileUserTopPhoto', '')
|
|
|
+
|
|
|
viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
|
|
|
realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
|
|
|
video_percent = '%.2f' % (realLikeCount / viewCount)
|
|
|
- if "special" in rule_dict:
|
|
|
- special = float(rule_dict.get("special"))
|
|
|
+ if special != 0:
|
|
|
+ special = float(special)
|
|
|
if float(video_percent) < special:
|
|
|
Common.logger(log_type, crawler).info(f"不符合条件:点赞/播放-{video_percent}\n")
|
|
|
AliyunLogger.logging(
|
|
@@ -274,16 +278,17 @@ class KuaishouauthorScheduling:
|
|
|
env=env,
|
|
|
message=f"{video_dict}\n"
|
|
|
)
|
|
|
- if int((int(time.time()) - int(publish_time_stamp)) / (3600*24)) > int(rule_dict.get("period", {}).get("max", 1000)):
|
|
|
- Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2004",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message=f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n'
|
|
|
- )
|
|
|
- return
|
|
|
+ if profile_userTop_photo != True:
|
|
|
+ if int((int(time.time()) - int(publish_time_stamp)) / (3600*24)) > int(rule_dict.get("period", {}).get("max", 1000)):
|
|
|
+ Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n'
|
|
|
+ )
|
|
|
+ return
|
|
|
if video_dict["video_id"] == '' or video_dict["cover_url"] == '' or video_dict["video_url"] == '':
|
|
|
Common.logger(log_type, crawler).info('无效视频\n')
|
|
|
AliyunLogger.logging(
|