|
@@ -24,13 +24,13 @@ def get_feeds():
|
|
|
"""
|
|
|
1.从看一看+小程序首页推荐,获取视频列表
|
|
|
2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c 中去重
|
|
|
- 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl 中去重
|
|
|
- 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
|
|
|
+ 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 中去重
|
|
|
+ 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
|
|
|
"""
|
|
|
host = "https://search.weixin.qq.com"
|
|
|
url = '/cgi-bin/recwxa/recwxavideolist?'
|
|
|
video_list_session = Common.get_session()
|
|
|
- Common.crawler_log().info("获取视频list时,session:{}".format(video_list_session))
|
|
|
+ Common.logger().info("获取视频list时,session:{}", video_list_session)
|
|
|
header = {
|
|
|
"Connection": "keep-alive",
|
|
|
"content-type": "application/json",
|
|
@@ -60,12 +60,12 @@ def get_feeds():
|
|
|
response = json.loads(r.content.decode("utf8"))
|
|
|
|
|
|
if "data" not in response:
|
|
|
- Common.crawler_log().info("获取视频list时,session过期,随机睡眠 31-50 秒")
|
|
|
+ Common.logger().info("获取视频list时,session过期,随机睡眠 31-50 秒")
|
|
|
# 如果返回空信息,则随机睡眠 31-40 秒
|
|
|
time.sleep(random.randint(31, 40))
|
|
|
get_feeds()
|
|
|
elif "items" not in response["data"]:
|
|
|
- Common.crawler_log().info("获取视频list时,返回空信息,随机睡眠 1-3 分钟")
|
|
|
+ Common.logger().info("获取视频list时,返回空信息,随机睡眠 1-3 分钟")
|
|
|
# 如果返回空信息,则随机睡眠 1-3 分钟
|
|
|
time.sleep(random.randint(60, 180))
|
|
|
get_feeds()
|
|
@@ -74,11 +74,11 @@ def get_feeds():
|
|
|
for i in range(len(items)):
|
|
|
# 如果该视频没有视频信息,则忽略
|
|
|
if "videoInfo" not in items[i]:
|
|
|
- Common.crawler_log().info("无视频信息")
|
|
|
+ Common.logger().info("无视频信息")
|
|
|
else:
|
|
|
# 获取视频ID
|
|
|
video_id = items[i]["videoId"]
|
|
|
- Common.crawler_log().info('视频ID:{}'.format(video_id))
|
|
|
+ Common.logger().info('视频ID:{}', video_id)
|
|
|
|
|
|
# 获取视频标题
|
|
|
video_title = items[i]["title"].strip().replace("\n", "")\
|
|
@@ -87,48 +87,48 @@ def get_feeds():
|
|
|
.replace("?", "").replace('"', "").replace("<", "")\
|
|
|
.replace(">", "").replace("|", "").replace(" ", "")\
|
|
|
.replace("&NBSP", "").replace(".", "。").replace(" ", "")
|
|
|
- Common.crawler_log().info('视频标题:{}'.format(video_title))
|
|
|
+ Common.logger().info('视频标题:{}', video_title)
|
|
|
|
|
|
# 获取视频播放次数
|
|
|
video_play_cnt = items[i]["playCount"]
|
|
|
- Common.crawler_log().info('视频播放次数:{}'.format(video_play_cnt))
|
|
|
+ Common.logger().info('视频播放次数:{}', video_play_cnt)
|
|
|
|
|
|
# 获取视频点赞数
|
|
|
video_liked_cnt = items[i]["liked_cnt"]
|
|
|
- Common.crawler_log().info('视频点赞数:{}'.format(video_liked_cnt))
|
|
|
+ Common.logger().info('视频点赞数:{}', video_liked_cnt)
|
|
|
|
|
|
# 获取视频时长
|
|
|
video_duration = items[i]["mediaDuration"]
|
|
|
- Common.crawler_log().info('视频时长:{}秒'.format(video_duration))
|
|
|
+ Common.logger().info('视频时长:{}秒', video_duration)
|
|
|
|
|
|
# 获取视频评论数
|
|
|
video_comment_cnt = items[i]["comment_cnt"]
|
|
|
- Common.crawler_log().info('视频评论数:{}'.format(video_comment_cnt))
|
|
|
+ Common.logger().info('视频评论数:{}', video_comment_cnt)
|
|
|
|
|
|
# 获取视频分享数
|
|
|
video_shared_cnt = items[i]["shared_cnt"]
|
|
|
- Common.crawler_log().info('视频分享数:{}'.format(video_shared_cnt))
|
|
|
+ Common.logger().info('视频分享数:{}', video_shared_cnt)
|
|
|
|
|
|
# 获取视频发布时间
|
|
|
video_send_date = items[i]["date"]
|
|
|
- Common.crawler_log().info('视频发布时间:{}'.format(
|
|
|
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_send_date))))
|
|
|
+ Common.logger().info("视频发布时间:{}",
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_send_date)))
|
|
|
|
|
|
# 获取视频用户名
|
|
|
video_user = items[i]["source"].strip().replace("\n", "")
|
|
|
- Common.crawler_log().info('视频用户名:{}'.format(video_user))
|
|
|
+ Common.logger().info('视频用户名:{}', video_user)
|
|
|
|
|
|
# 获取视频宽高
|
|
|
if "short_video_info" not in items[i]:
|
|
|
video_width = "0"
|
|
|
video_height = "0"
|
|
|
video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
- Common.crawler_log().info("无分辨率:{}".format(video_resolution))
|
|
|
+ Common.logger().info("无分辨率:{}", video_resolution)
|
|
|
elif len(items[i]["short_video_info"]) == 0:
|
|
|
video_width = "0"
|
|
|
video_height = "0"
|
|
|
video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
- Common.crawler_log().info("无分辨率:{}".format(video_resolution))
|
|
|
+ Common.logger().info("无分辨率:{}", video_resolution)
|
|
|
|
|
|
else:
|
|
|
# 视频宽
|
|
@@ -136,34 +136,34 @@ def get_feeds():
|
|
|
# 视频高
|
|
|
video_height = items[i]["short_video_info"]["height"]
|
|
|
video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
- Common.crawler_log().info('视频宽高:{}'.format(video_resolution))
|
|
|
+ Common.logger().info('视频宽高:{}', video_resolution)
|
|
|
|
|
|
# 获取视频用户头像
|
|
|
video_user_cover = items[i]["bizIcon"]
|
|
|
- Common.crawler_log().info('视频用户头像:{}'.format(video_user_cover))
|
|
|
+ Common.logger().info('视频用户头像:{}', video_user_cover)
|
|
|
|
|
|
# 获取视频封面
|
|
|
if "smartCoverUrl" in items[i]:
|
|
|
video_cover = items[i]["smartCoverUrl"]
|
|
|
- Common.crawler_log().info('视频封面:{}'.format(video_cover))
|
|
|
+ Common.logger().info('视频封面:{}', video_cover)
|
|
|
else:
|
|
|
video_cover = items[i]["thumbUrl"]
|
|
|
- Common.crawler_log().info('视频封面:{}'.format(video_cover))
|
|
|
+ Common.logger().info('视频封面:{}', video_cover)
|
|
|
|
|
|
# 获取播放地址
|
|
|
if "mpInfo" in items[i]["videoInfo"]["videoCdnInfo"].keys():
|
|
|
if len(items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
|
|
|
url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
|
|
|
- Common.crawler_log().info('视频播放地址:{}'.format(url))
|
|
|
+ Common.logger().info('视频播放地址:{}', url)
|
|
|
else:
|
|
|
url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
|
|
|
- Common.crawler_log().info('视频播放地址:{}'.format(url))
|
|
|
+ Common.logger().info('视频播放地址:{}', url)
|
|
|
elif "ctnInfo" in items[i]["videoInfo"]["videoCdnInfo"]:
|
|
|
url = items[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
|
|
|
- Common.crawler_log().info('视频播放地址:{}'.format(url))
|
|
|
+ Common.logger().info('视频播放地址:{}', url)
|
|
|
else:
|
|
|
url = items[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
|
|
|
- Common.crawler_log().info('视频播放地址:{}'.format(url))
|
|
|
+ Common.logger().info('视频播放地址:{}', url)
|
|
|
|
|
|
# 过滤无效视频
|
|
|
if video_id == "" \
|
|
@@ -178,27 +178,27 @@ def get_feeds():
|
|
|
or video_user_cover == "" \
|
|
|
or video_cover == "" \
|
|
|
or url == "":
|
|
|
- Common.crawler_log().info("无效视频")
|
|
|
+ Common.logger().info("无效视频")
|
|
|
else:
|
|
|
# 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
|
|
|
if video_id in [j for i in Feishu.get_values_batch("20ce0c") for j in i]:
|
|
|
- Common.crawler_log().info("该视频已下载:{}".format(video_title))
|
|
|
+ Common.logger().info("该视频已下载:{}", video_title)
|
|
|
else:
|
|
|
- Common.crawler_log().info("该视频未下载:{}".format(video_title))
|
|
|
+ Common.logger().info("该视频未下载:{}", video_title)
|
|
|
|
|
|
- # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
|
|
|
- if video_id in [j for i in Feishu.get_values_batch("Y8N3Vl") for j in i]:
|
|
|
- Common.crawler_log().info("该视频已在kanyikan_feeds_1中:{}".format(video_title))
|
|
|
+ # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
|
|
|
+ if video_id in [j for i in Feishu.get_values_batch("SdCHOM") for j in i]:
|
|
|
+ Common.logger().info("该视频已在kanyikan_feeds_1中:{}", video_title)
|
|
|
else:
|
|
|
- Common.crawler_log().info("添加该视频信息至kanyikan_feeds_1:{}".format(video_title))
|
|
|
+ Common.logger().info("添加该视频信息至kanyikan_feeds_1:{}", video_title)
|
|
|
|
|
|
# 看一看+工作表,插入首行
|
|
|
- print(Feishu.insert_columns("Y8N3Vl"))
|
|
|
+ print(Feishu.insert_columns("SdCHOM"))
|
|
|
|
|
|
# 获取当前时间
|
|
|
get_feeds_time = int(time.time())
|
|
|
# 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
|
|
|
- Feishu.update_values("Y8N3Vl",
|
|
|
+ Feishu.update_values("SdCHOM",
|
|
|
a1=str(get_feeds_time),
|
|
|
b1=str(video_id),
|
|
|
c1=str(video_play_cnt),
|
|
@@ -215,7 +215,7 @@ def get_feeds():
|
|
|
n1=str(url),
|
|
|
o1=str(video_list_session))
|
|
|
except Exception as e:
|
|
|
- Common.crawler_log().error("获取视频 list 时异常:{}".format(e))
|
|
|
+ Common.logger().exception("获取视频 list 时异常:{}", e)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|