zhangyong 1 éve
szülő
commit
26e10f8bf5

+ 20 - 15
kuaishou/kuaishou_author/kuaishou_author_scheduling_new.py

@@ -86,6 +86,7 @@ class KuaishouauthorScheduling:
         pcursor = ""
         mq = MQ(topic_name="topic_crawler_etl_" + env)
         count = 0
+        special = 0
         for i in range(5):
         # while True:
             if count > 5:
@@ -99,20 +100,20 @@ class KuaishouauthorScheduling:
                     rule_dict = {
                         "play_cnt": {"min": 10000, "max": 0},
                         'period': {"min": 90, "max": 90},
-                        'special': 0.01
                     }
+                    special = 0.01
                 elif flag == "V2":
                     rule_dict = {
                         "play_cnt": {"min": 2000, "max": 0},
                         'period': {"min": 90, "max": 90},
-                        'special': 0.01
                     }
+                    special = 0.01
                 elif flag == "V3":
                     rule_dict = {
                         "play_cnt": {"min": 100, "max": 0},
                         'period': {"min": 90, "max": 90},
-                        'special': 0.01
                     }
+                    special = 0.01
             else:
                 link = user_dict["link"]
 
@@ -131,6 +132,7 @@ class KuaishouauthorScheduling:
                 'Content-Type': 'application/json',
                 'Origin': 'https://www.kuaishou.com',
                 'Cookie': cookie,
+                # 'Cookie': "kpf=PC_WEB; clientid=3; did=web_b7830efe78a1e48daacb126b6f52ad05; userId=1299331643; didv=1711610963000; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABsqe9eHH9ZkvQs7beNfwe3IobrRFI_3HvvYRx7BGcVwVImonsmZgPXWpHF89uY-Fomry6l9EtimGUwt4EEF0xMeyMauj7ZvMndNTvjG8qro9yB-xtc6_iN0a6-peDQz6zxeUs6gQLkm58NFtCTDGsqWqMDs1ruDWlJiy-1GVqT59GI_AoJgukwXtTEaYFKzDSmUWv7qsJ0Ya1gDlsyPFO0hoSoJCKbxHIWXjzVWap_gGna5KjIiCdD9i3Uy2z2XGYS--wpIFP_h8k_AasD37nnju7rv_ocSgFMAE; kuaishou.server.web_ph=76097deb523bd30b37fa3d9f67200e5e4e72; kpn=KUAISHOU_VISION",
                 'Content-Length': '1260',
                 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
                 'Host': 'www.kuaishou.com',
@@ -233,11 +235,13 @@ class KuaishouauthorScheduling:
                         video_height = feeds[i].get("photo", {}).get("videoResource").get("hevc", {}).get("adaptationSet", {})[0].get("representation", {})[0].get("height", 0)
                     publish_time_stamp = int(int(feeds[i].get('photo', {}).get('timestamp', 0)) / 1000)
                     publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+                    profile_userTop_photo = feeds[i].get('photo', {}).get('profileUserTopPhoto', '')
+
                     viewCount = int(feeds[i].get('photo', {}).get('viewCount', 0))
                     realLikeCount = int(feeds[i].get('photo', {}).get('realLikeCount', 0))
                     video_percent = '%.2f' % (realLikeCount / viewCount)
-                    if "special" in rule_dict:
-                        special = float(rule_dict.get("special"))
+                    if special != 0:
+                        special = float(special)
                         if float(video_percent) < special:
                             Common.logger(log_type, crawler).info(f"不符合条件:点赞/播放-{video_percent}\n")
                             AliyunLogger.logging(
@@ -274,16 +278,17 @@ class KuaishouauthorScheduling:
                         env=env,
                         message=f"{video_dict}\n"
                     )
-                    if int((int(time.time()) - int(publish_time_stamp)) / (3600*24)) > int(rule_dict.get("period", {}).get("max", 1000)):
-                        Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
-                        AliyunLogger.logging(
-                            code="2004",
-                            platform=crawler,
-                            mode=log_type,
-                            env=env,
-                            message=f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n'
-                        )
-                        return
+                    if profile_userTop_photo != True:
+                        if int((int(time.time()) - int(publish_time_stamp)) / (3600*24)) > int(rule_dict.get("period", {}).get("max", 1000)):
+                            Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n'
+                            )
+                            return
                     if video_dict["video_id"] == '' or video_dict["cover_url"] == '' or video_dict["video_url"] == '':
                         Common.logger(log_type, crawler).info('无效视频\n')
                         AliyunLogger.logging(

+ 2 - 2
kuaishou/kuaishou_main/run_ks_author_dev.py

@@ -17,7 +17,7 @@ def kuaishou_recommend_main(log_type, crawler, env):
                                                           "like_cnt": {"min": 100, "max": 0},
                                                           "duration": {"min": 30, "max": 0},
                                                           "period": {"min": 365, "max": 365}},
-                                               user_list=[{"uid": 6267140, "source": "kuaishou", "link": "3xratgemedr8ctq",
+                                               user_list=[{"uid": 6267140, "source": "kuaishou", "link": "V2_https://www.kuaishou.com/profile/3xx43zn5t7m64qa",
                                                            "nick_name": "买两个橘子",
                                                            "avatar_url": "http://rescdn.yishihui.com/user/default/avatar/live/1616555578819_u=1922778943,2660693611&fm=26&gp=0.jpg",
                                                            "mode": "author"},
@@ -32,4 +32,4 @@ def kuaishou_recommend_main(log_type, crawler, env):
 
 
 if __name__ == "__main__":
-    kuaishou_recommend_main("author", "kuaishou", "prod")
+    kuaishou_recommend_main("author", "kuaishou", "dev")