Pārlūkot izejas kodu

看一看plus调试

zhangyong 1 gadu atpakaļ
vecāks
revīzija
4a27ce7b97
1 mainītis faili ar 218 papildinājumiem un 125 dzēšanām
  1. 218 125
      kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

+ 218 - 125
kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

@@ -45,6 +45,7 @@ class KanyikanRecommend:
             if session is None:
                 time.sleep(1)
                 cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
+            sharesearchid = 0
             for i in range(20):
                 url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
                 vid = random.choice(
@@ -58,20 +59,22 @@ class KanyikanRecommend:
                     ["200201", "200", "208", "208201"])
                 switchnewuser = random.choice(
                     ["0", "1"])
+                isFromUgc = random.choice(
+                    ["false", "true"])
                 switchprofile = random.choice(
                     ["0", "1"])
                 subscene = random.choice(
-                    ["1089", "1074", "208"])
+                    ["1089", "1074", "208", "1007", "1008"])
                 params = random.choice([{
                     'session': session,
                     "offset": 0,
-                    "wxaVersion": "3.9.2",
+                    "wxaVersion": "3.17.12",
                     "count": "10",
                     "channelid": channelid,
                     "scene": '310',
                     "subscene": subscene,
-                    "clientVersion": '8.0.18',
-                    "sharesearchid": '0',
+                    "clientVersion": '3.8.6',
+                    "sharesearchid": sharesearchid,
                     "nettype": 'wifi',
                     "switchprofile": switchprofile,
                     "switchnewuser": switchnewuser,
@@ -84,13 +87,13 @@ class KanyikanRecommend:
                     "count": "15",
                     "scene": '310',
                     "subscene": subscene,
-                    "model": "MacBookPro14%2C111.6.7",
-                    "nettype": 'wifi',
-                    "clientVersion": '3.5.5',
-                    "sharesearchid": '0',
+                    "model": "华为",
+                    "nettype": '4g',
+                    "clientVersion": '3.8.6',
+                    "sharesearchid": sharesearchid,
                     "presearchid": "17530764723864413041",
                     "sharesource": "0",
-                    "isFromUgc": "false",
+                    "isFromUgc": isFromUgc,
                     "ad": 0,
                     "switchprofile": switchprofile,
                     "switchnewuser": switchnewuser,
@@ -135,6 +138,7 @@ class KanyikanRecommend:
                     time.sleep(random.randint(60, 180))
                     cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
                 feeds = response.json().get("data", {}).get("items", "")
+                sharesearchid = response.json().get("searchid", {})
                 if feeds == "":
                     Common.logger(log_type, crawler).info(f"feeds:{feeds}")
                     Common.logging(log_type, crawler, env, f"feeds:{feeds}")
@@ -195,122 +199,211 @@ class KanyikanRecommend:
                         # 获取当前时间
                         current_time = datetime.now()
                         formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
-                        values = [[
-                            videoId,
-                            publish_time_str,
-                            video_title,
-                            feeds[i].get("playCount", 0),
-                            feeds[i].get("liked_cnt", 0),
-                            feeds[i].get("comment_cnt", 0),
-                            feeds[i].get("shared_cnt", 0),
-                            feeds[i].get("mediaDuration", 0),
-                            publish_time_str,
-                            formatted_time,
-                            feeds[i].get("thumbUrl", ""),
-                            video_url
-                        ]]
-                        Feishu.insert_columns('kanyikan', 'kanyikan', "yQzAil", "ROWS", 1, 2)
-                        time.sleep(0.5)
-                        Feishu.update_values('kanyikan', 'kanyikan', "yQzAil", "A2:Z2", values)
-                        # for k, v in video_dict.items():
-                        #     Common.logger(log_type, crawler).info(f"{k}:{v}")
-                        # Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
-                        # AliyunLogger.logging(
-                        #     code="1000",
-                        #     platform=crawler,
-                        #     mode=log_type,
-                        #     env=env,
-                        #     message=f"{video_dict}\n"
-                        # )
-                        # video_percent = '%.2f' % (shared_cnt / playCount)
-                        # if float(video_percent) < 0.05:
-                        #     Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
-                        #     Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
-                        #     AliyunLogger.logging(
-                        #         code="2004",
-                        #         platform=crawler,
-                        #         mode=log_type,
-                        #         env=env,
-                        #         message=f"不符合抓取条件,分享/播放:{video_percent}\n"
-                        #     )
-                        #     continue
-                        # elif shared_cnt < 800:
-                        #     Common.logger(log_type, crawler).info(f"播放量:{playCount}\n")
-                        #     Common.logging(log_type, crawler, env, f"播放量:{playCount}\n")
-                        #     AliyunLogger.logging(
-                        #         code="2004",
-                        #         platform=crawler,
-                        #         mode=log_type,
-                        #         env=env,
-                        #         message=f"不符合抓取条件,播放量:{playCount}\n"
-                        #     )
-                        #     continue
-                        # if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
-                        #     Common.logger(log_type, crawler).info("无效视频\n")
-                        #     Common.logging(log_type, crawler, env, "无效视频\n")
-                        #     AliyunLogger.logging(
-                        #         code="2004",
-                        #         platform=crawler,
-                        #         mode=log_type,
-                        #         env=env,
-                        #         message=f"无效视频"
-                        #     )
-                        # elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
-                        #     Common.logger(log_type, crawler).info("不满足抓取规则\n")
-                        #     Common.logging(log_type, crawler, env, "不满足抓取规则\n")
-                        #     AliyunLogger.logging(
-                        #         code="2004",
-                        #         platform=crawler,
-                        #         mode=log_type,
-                        #         env=env,
-                        #         message='不满足抓取规则\n'
-                        #     )
-                        # elif any(str(word) if str(word) in video_dict["video_title"] else False
-                        #          for word in get_config_from_mysql(log_type=log_type,
-                        #                                            source=crawler,
-                        #                                            env=env,
-                        #                                            text="filter",
-                        #                                            action="")) is True:
-                        #     Common.logger(log_type, crawler).info('已中过滤词\n')
-                        #     Common.logging(log_type, crawler, env, '已中过滤词\n')
-                        #     AliyunLogger.logging(
-                        #         code="2004",
-                        #         platform=crawler,
-                        #         mode=log_type,
-                        #         env=env,
-                        #         message='已中过滤词\n'
-                        #     )
-                        # elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
-                        #     Common.logger(log_type, crawler).info('视频已下载\n')
-                        #     Common.logging(log_type, crawler, env, '视频已下载\n')
-                        #     AliyunLogger.logging(
-                        #         code="2002",
-                        #         platform=crawler,
-                        #         mode=log_type,
-                        #         env=env,
-                        #         message='视频已下载\n'
-                        #     )
-                        #
-                        # else:
-                        #     video_dict["out_user_id"] = video_dict["user_id"]
-                        #     video_dict["platform"] = crawler
-                        #     video_dict["strategy"] = log_type
-                        #     video_dict["strategy_type"] = "data"
-                        #     video_dict["out_video_id"] = video_dict["video_id"]
-                        #     video_dict["width"] = video_dict["video_width"]
-                        #     video_dict["height"] = video_dict["video_height"]
-                        #     video_dict["crawler_rule"] = json.dumps(rule_dict)
-                        #     video_dict["user_id"] = our_uid
-                        #     video_dict["publish_time"] = video_dict["publish_time_str"]
-                        #     cls.insert_video_id(log_type, crawler, video_id, env)
-                        #     AliyunLogger.logging(
-                        #         code="1010",
-                        #         platform=crawler,
-                        #         mode=log_type,
-                        #         env=env,
-                        #         message=f"看一看video_id:{video_id}入库",
-                        #     )
-                        #     mq.send_msg(video_dict)
+
+                        for k, v in video_dict.items():
+                            Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
+                        AliyunLogger.logging(
+                            code="1000",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"{video_dict}\n"
+                        )
+
+                        video_percent = '%.2f' % (shared_cnt / playCount)
+                        if float(video_percent) < 0.1:
+                            Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
+                            Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f"不符合抓取条件,分享/播放:{video_percent}\n"
+                            )
+                            values = [[
+                                videoId,
+                                video_title,
+                                feeds[i].get("playCount", 0),
+                                feeds[i].get("liked_cnt", 0),
+                                feeds[i].get("comment_cnt", 0),
+                                feeds[i].get("shared_cnt", 0),
+                                feeds[i].get("mediaDuration", 0),
+                                publish_time_str,
+                                formatted_time,
+                                feeds[i].get("thumbUrl", ""),
+                                video_url,
+                                f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
+                                "否",
+                                f"不符合抓取条件,分享/播放:{video_percent}"
+                            ]]
+                            Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
+                            time.sleep(0.5)
+                            Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
+                            continue
+
+                        if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
+                            Common.logger(log_type, crawler).info("无效视频\n")
+                            Common.logging(log_type, crawler, env, "无效视频\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f"无效视频"
+                            )
+                            values = [[
+                                videoId,
+                                video_title,
+                                feeds[i].get("playCount", 0),
+                                feeds[i].get("liked_cnt", 0),
+                                feeds[i].get("comment_cnt", 0),
+                                feeds[i].get("shared_cnt", 0),
+                                feeds[i].get("mediaDuration", 0),
+                                publish_time_str,
+                                formatted_time,
+                                feeds[i].get("thumbUrl", ""),
+                                video_url,
+                                f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
+                                "否",
+                                f"无效视频"
+                            ]]
+                            Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
+                            time.sleep(0.5)
+                            Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
+                        elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
+                            Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='不满足抓取规则\n'
+                            )
+                            values = [[
+                                videoId,
+                                video_title,
+                                feeds[i].get("playCount", 0),
+                                feeds[i].get("liked_cnt", 0),
+                                feeds[i].get("comment_cnt", 0),
+                                feeds[i].get("shared_cnt", 0),
+                                feeds[i].get("mediaDuration", 0),
+                                publish_time_str,
+                                formatted_time,
+                                feeds[i].get("thumbUrl", ""),
+                                video_url,
+                                f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
+                                "否",
+                                f"不满足抓取规则"
+                            ]]
+                            Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
+                            time.sleep(0.5)
+                            Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
+                        elif any(str(word) if str(word) in video_dict["video_title"] else False
+                                 for word in get_config_from_mysql(log_type=log_type,
+                                                                   source=crawler,
+                                                                   env=env,
+                                                                   text="filter",
+                                                                   action="")) is True:
+                            Common.logger(log_type, crawler).info('已中过滤词\n')
+                            Common.logging(log_type, crawler, env, '已中过滤词\n')
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='已中过滤词\n'
+                            )
+                            values = [[
+                                videoId,
+                                video_title,
+                                feeds[i].get("playCount", 0),
+                                feeds[i].get("liked_cnt", 0),
+                                feeds[i].get("comment_cnt", 0),
+                                feeds[i].get("shared_cnt", 0),
+                                feeds[i].get("mediaDuration", 0),
+                                publish_time_str,
+                                formatted_time,
+                                feeds[i].get("thumbUrl", ""),
+                                video_url,
+                                f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
+                                "否",
+                                f"已中过滤词"
+                            ]]
+                            Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
+                            time.sleep(0.5)
+                            Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
+                        elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
+                            Common.logger(log_type, crawler).info('视频已下载\n')
+                            Common.logging(log_type, crawler, env, '视频已下载\n')
+                            AliyunLogger.logging(
+                                code="2002",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='视频已下载\n'
+                            )
+                            values = [[
+                                videoId,
+                                video_title,
+                                feeds[i].get("playCount", 0),
+                                feeds[i].get("liked_cnt", 0),
+                                feeds[i].get("comment_cnt", 0),
+                                feeds[i].get("shared_cnt", 0),
+                                feeds[i].get("mediaDuration", 0),
+                                publish_time_str,
+                                formatted_time,
+                                feeds[i].get("thumbUrl", ""),
+                                video_url,
+                                f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
+                                "否",
+                                f"视频已下载"
+                            ]]
+                            Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
+                            time.sleep(0.5)
+                            Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
+
+                        else:
+                            video_dict["out_user_id"] = video_dict["user_id"]
+                            video_dict["platform"] = crawler
+                            video_dict["strategy"] = log_type
+                            video_dict["strategy_type"] = "data"
+                            video_dict["out_video_id"] = video_dict["video_id"]
+                            video_dict["width"] = video_dict["video_width"]
+                            video_dict["height"] = video_dict["video_height"]
+                            video_dict["crawler_rule"] = json.dumps(rule_dict)
+                            video_dict["user_id"] = our_uid
+                            video_dict["publish_time"] = video_dict["publish_time_str"]
+                            cls.insert_video_id(log_type, crawler, video_id, env)
+                            AliyunLogger.logging(
+                                code="1010",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f"看一看video_id:{video_id}入库",
+                            )
+                            values = [[
+                                videoId,
+                                video_title,
+                                feeds[i].get("playCount", 0),
+                                feeds[i].get("liked_cnt", 0),
+                                feeds[i].get("comment_cnt", 0),
+                                feeds[i].get("shared_cnt", 0),
+                                feeds[i].get("mediaDuration", 0),
+                                publish_time_str,
+                                formatted_time,
+                                feeds[i].get("thumbUrl", ""),
+                                video_url,
+                                f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
+                                "是",
+                                ""
+                            ]]
+                            Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
+                            time.sleep(0.5)
+                            Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
+                            mq.send_msg(video_dict)
                         time.sleep(random.randint(10, 15))
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")