|
@@ -45,6 +45,7 @@ class KanyikanRecommend:
|
|
|
if session is None:
|
|
|
time.sleep(1)
|
|
|
cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
|
|
|
+ sharesearchid = 0
|
|
|
for i in range(20):
|
|
|
url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
|
|
|
vid = random.choice(
|
|
@@ -58,20 +59,22 @@ class KanyikanRecommend:
|
|
|
["200201", "200", "208", "208201"])
|
|
|
switchnewuser = random.choice(
|
|
|
["0", "1"])
|
|
|
+ isFromUgc = random.choice(
|
|
|
+ ["false", "true"])
|
|
|
switchprofile = random.choice(
|
|
|
["0", "1"])
|
|
|
subscene = random.choice(
|
|
|
- ["1089", "1074", "208"])
|
|
|
+ ["1089", "1074", "208", "1007", "1008"])
|
|
|
params = random.choice([{
|
|
|
'session': session,
|
|
|
"offset": 0,
|
|
|
- "wxaVersion": "3.9.2",
|
|
|
+ "wxaVersion": "3.17.12",
|
|
|
"count": "10",
|
|
|
"channelid": channelid,
|
|
|
"scene": '310',
|
|
|
"subscene": subscene,
|
|
|
- "clientVersion": '8.0.18',
|
|
|
- "sharesearchid": '0',
|
|
|
+ "clientVersion": '3.8.6',
|
|
|
+ "sharesearchid": sharesearchid,
|
|
|
"nettype": 'wifi',
|
|
|
"switchprofile": switchprofile,
|
|
|
"switchnewuser": switchnewuser,
|
|
@@ -84,13 +87,13 @@ class KanyikanRecommend:
|
|
|
"count": "15",
|
|
|
"scene": '310',
|
|
|
"subscene": subscene,
|
|
|
- "model": "MacBookPro14%2C111.6.7",
|
|
|
- "nettype": 'wifi',
|
|
|
- "clientVersion": '3.5.5',
|
|
|
- "sharesearchid": '0',
|
|
|
+ "model": "华为",
|
|
|
+ "nettype": '4g',
|
|
|
+ "clientVersion": '3.8.6',
|
|
|
+ "sharesearchid": sharesearchid,
|
|
|
"presearchid": "17530764723864413041",
|
|
|
"sharesource": "0",
|
|
|
- "isFromUgc": "false",
|
|
|
+ "isFromUgc": isFromUgc,
|
|
|
"ad": 0,
|
|
|
"switchprofile": switchprofile,
|
|
|
"switchnewuser": switchnewuser,
|
|
@@ -135,6 +138,7 @@ class KanyikanRecommend:
|
|
|
time.sleep(random.randint(60, 180))
|
|
|
cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
|
|
|
feeds = response.json().get("data", {}).get("items", "")
|
|
|
+ sharesearchid = response.json().get("searchid", {})
|
|
|
if feeds == "":
|
|
|
Common.logger(log_type, crawler).info(f"feeds:{feeds}")
|
|
|
Common.logging(log_type, crawler, env, f"feeds:{feeds}")
|
|
@@ -195,122 +199,211 @@ class KanyikanRecommend:
|
|
|
# 获取当前时间
|
|
|
current_time = datetime.now()
|
|
|
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
- values = [[
|
|
|
- videoId,
|
|
|
- publish_time_str,
|
|
|
- video_title,
|
|
|
- feeds[i].get("playCount", 0),
|
|
|
- feeds[i].get("liked_cnt", 0),
|
|
|
- feeds[i].get("comment_cnt", 0),
|
|
|
- feeds[i].get("shared_cnt", 0),
|
|
|
- feeds[i].get("mediaDuration", 0),
|
|
|
- publish_time_str,
|
|
|
- formatted_time,
|
|
|
- feeds[i].get("thumbUrl", ""),
|
|
|
- video_url
|
|
|
- ]]
|
|
|
- Feishu.insert_columns('kanyikan', 'kanyikan', "yQzAil", "ROWS", 1, 2)
|
|
|
- time.sleep(0.5)
|
|
|
- Feishu.update_values('kanyikan', 'kanyikan', "yQzAil", "A2:Z2", values)
|
|
|
- # for k, v in video_dict.items():
|
|
|
- # Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
- # Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="1000",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message=f"{video_dict}\n"
|
|
|
- # )
|
|
|
- # video_percent = '%.2f' % (shared_cnt / playCount)
|
|
|
- # if float(video_percent) < 0.05:
|
|
|
- # Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
|
|
|
- # Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="2004",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message=f"不符合抓取条件,分享/播放:{video_percent}\n"
|
|
|
- # )
|
|
|
- # continue
|
|
|
- # elif shared_cnt < 800:
|
|
|
- # Common.logger(log_type, crawler).info(f"播放量:{playCount}\n")
|
|
|
- # Common.logging(log_type, crawler, env, f"播放量:{playCount}\n")
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="2004",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message=f"不符合抓取条件,播放量:{playCount}\n"
|
|
|
- # )
|
|
|
- # continue
|
|
|
- # if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
|
|
|
- # Common.logger(log_type, crawler).info("无效视频\n")
|
|
|
- # Common.logging(log_type, crawler, env, "无效视频\n")
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="2004",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message=f"无效视频"
|
|
|
- # )
|
|
|
- # elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
|
|
|
- # Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
- # Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="2004",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message='不满足抓取规则\n'
|
|
|
- # )
|
|
|
- # elif any(str(word) if str(word) in video_dict["video_title"] else False
|
|
|
- # for word in get_config_from_mysql(log_type=log_type,
|
|
|
- # source=crawler,
|
|
|
- # env=env,
|
|
|
- # text="filter",
|
|
|
- # action="")) is True:
|
|
|
- # Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
- # Common.logging(log_type, crawler, env, '已中过滤词\n')
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="2004",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message='已中过滤词\n'
|
|
|
- # )
|
|
|
- # elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
|
|
|
- # Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
- # Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="2002",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message='视频已下载\n'
|
|
|
- # )
|
|
|
- #
|
|
|
- # else:
|
|
|
- # video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
- # video_dict["platform"] = crawler
|
|
|
- # video_dict["strategy"] = log_type
|
|
|
- # video_dict["strategy_type"] = "data"
|
|
|
- # video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
- # video_dict["width"] = video_dict["video_width"]
|
|
|
- # video_dict["height"] = video_dict["video_height"]
|
|
|
- # video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
- # video_dict["user_id"] = our_uid
|
|
|
- # video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
- # cls.insert_video_id(log_type, crawler, video_id, env)
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="1010",
|
|
|
- # platform=crawler,
|
|
|
- # mode=log_type,
|
|
|
- # env=env,
|
|
|
- # message=f"看一看video_id:{video_id}入库",
|
|
|
- # )
|
|
|
- # mq.send_msg(video_dict)
|
|
|
+
|
|
|
+ for k, v in video_dict.items():
|
|
|
+ Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
+ Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"{video_dict}\n"
|
|
|
+ )
|
|
|
+
|
|
|
+ video_percent = '%.2f' % (shared_cnt / playCount)
|
|
|
+ if float(video_percent) < 0.1:
|
|
|
+ Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
|
|
|
+ Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"不符合抓取条件,分享/播放:{video_percent}\n"
|
|
|
+ )
|
|
|
+ values = [[
|
|
|
+ videoId,
|
|
|
+ video_title,
|
|
|
+ feeds[i].get("playCount", 0),
|
|
|
+ feeds[i].get("liked_cnt", 0),
|
|
|
+ feeds[i].get("comment_cnt", 0),
|
|
|
+ feeds[i].get("shared_cnt", 0),
|
|
|
+ feeds[i].get("mediaDuration", 0),
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ feeds[i].get("thumbUrl", ""),
|
|
|
+ video_url,
|
|
|
+ f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
|
|
|
+ "否",
|
|
|
+ f"不符合抓取条件,分享/播放:{video_percent}"
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
|
|
|
+ continue
|
|
|
+
|
|
|
+ if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
|
|
|
+ Common.logger(log_type, crawler).info("无效视频\n")
|
|
|
+ Common.logging(log_type, crawler, env, "无效视频\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"无效视频"
|
|
|
+ )
|
|
|
+ values = [[
|
|
|
+ videoId,
|
|
|
+ video_title,
|
|
|
+ feeds[i].get("playCount", 0),
|
|
|
+ feeds[i].get("liked_cnt", 0),
|
|
|
+ feeds[i].get("comment_cnt", 0),
|
|
|
+ feeds[i].get("shared_cnt", 0),
|
|
|
+ feeds[i].get("mediaDuration", 0),
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ feeds[i].get("thumbUrl", ""),
|
|
|
+ video_url,
|
|
|
+ f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
|
|
|
+ "否",
|
|
|
+ f"无效视频"
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
|
|
|
+ elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
|
|
|
+ Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
+ Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='不满足抓取规则\n'
|
|
|
+ )
|
|
|
+ values = [[
|
|
|
+ videoId,
|
|
|
+ video_title,
|
|
|
+ feeds[i].get("playCount", 0),
|
|
|
+ feeds[i].get("liked_cnt", 0),
|
|
|
+ feeds[i].get("comment_cnt", 0),
|
|
|
+ feeds[i].get("shared_cnt", 0),
|
|
|
+ feeds[i].get("mediaDuration", 0),
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ feeds[i].get("thumbUrl", ""),
|
|
|
+ video_url,
|
|
|
+ f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
|
|
|
+ "否",
|
|
|
+ f"不满足抓取规则"
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
|
|
|
+ elif any(str(word) if str(word) in video_dict["video_title"] else False
|
|
|
+ for word in get_config_from_mysql(log_type=log_type,
|
|
|
+ source=crawler,
|
|
|
+ env=env,
|
|
|
+ text="filter",
|
|
|
+ action="")) is True:
|
|
|
+ Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
+ Common.logging(log_type, crawler, env, '已中过滤词\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='已中过滤词\n'
|
|
|
+ )
|
|
|
+ values = [[
|
|
|
+ videoId,
|
|
|
+ video_title,
|
|
|
+ feeds[i].get("playCount", 0),
|
|
|
+ feeds[i].get("liked_cnt", 0),
|
|
|
+ feeds[i].get("comment_cnt", 0),
|
|
|
+ feeds[i].get("shared_cnt", 0),
|
|
|
+ feeds[i].get("mediaDuration", 0),
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ feeds[i].get("thumbUrl", ""),
|
|
|
+ video_url,
|
|
|
+ f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
|
|
|
+ "否",
|
|
|
+ f"已中过滤词"
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
|
|
|
+ elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
|
|
|
+ Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
+ Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2002",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='视频已下载\n'
|
|
|
+ )
|
|
|
+ values = [[
|
|
|
+ videoId,
|
|
|
+ video_title,
|
|
|
+ feeds[i].get("playCount", 0),
|
|
|
+ feeds[i].get("liked_cnt", 0),
|
|
|
+ feeds[i].get("comment_cnt", 0),
|
|
|
+ feeds[i].get("shared_cnt", 0),
|
|
|
+ feeds[i].get("mediaDuration", 0),
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ feeds[i].get("thumbUrl", ""),
|
|
|
+ video_url,
|
|
|
+ f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
|
|
|
+ "否",
|
|
|
+ f"视频已下载"
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
|
|
|
+
|
|
|
+ else:
|
|
|
+ video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
+ video_dict["platform"] = crawler
|
|
|
+ video_dict["strategy"] = log_type
|
|
|
+ video_dict["strategy_type"] = "data"
|
|
|
+ video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
+ video_dict["width"] = video_dict["video_width"]
|
|
|
+ video_dict["height"] = video_dict["video_height"]
|
|
|
+ video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
+ video_dict["user_id"] = our_uid
|
|
|
+ video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
+ cls.insert_video_id(log_type, crawler, video_id, env)
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1010",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"看一看video_id:{video_id}入库",
|
|
|
+ )
|
|
|
+ values = [[
|
|
|
+ videoId,
|
|
|
+ video_title,
|
|
|
+ feeds[i].get("playCount", 0),
|
|
|
+ feeds[i].get("liked_cnt", 0),
|
|
|
+ feeds[i].get("comment_cnt", 0),
|
|
|
+ feeds[i].get("shared_cnt", 0),
|
|
|
+ feeds[i].get("mediaDuration", 0),
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ feeds[i].get("thumbUrl", ""),
|
|
|
+ video_url,
|
|
|
+ f"channelid:{channelid},switchnewuser:{switchnewuser},sharesearchid:{sharesearchid},isFromUgc:{isFromUgc},switchprofile:{switchprofile},subscene:{subscene}",
|
|
|
+ "是",
|
|
|
+ ""
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns('kanyikan', 'kanyikan', "zS0vxs", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values('kanyikan', 'kanyikan', "zS0vxs", "A2:Z2", values)
|
|
|
+ mq.send_msg(video_dict)
|
|
|
time.sleep(random.randint(10, 15))
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|