|
@@ -17,6 +17,8 @@ proxies = {"http": None, "https": None}
|
|
|
|
|
|
|
|
|
class Follow:
|
|
|
+ # 已抓取视频数量
|
|
|
+ get_video_count = []
|
|
|
# 小程序:关注列表翻页参数
|
|
|
follow_pcursor = ""
|
|
|
# 小程序:个人主页视频列表翻页参数
|
|
@@ -83,7 +85,7 @@ class Follow:
|
|
|
|
|
|
# 从小程序中,关注用户列表同步至云文档
|
|
|
@classmethod
|
|
|
- def get_sub_or_fans_list(cls, log_type):
|
|
|
+ def get_follow_users_to_feishu(cls, log_type):
|
|
|
try:
|
|
|
follow_list = []
|
|
|
follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "2OLxLr")
|
|
@@ -158,11 +160,11 @@ class Follow:
|
|
|
cls.follow_pcursor = r.json()["pcursor"]
|
|
|
# 翻页,直至到底了
|
|
|
if cls.follow_pcursor != "no_more":
|
|
|
- cls.get_sub_or_fans_list(log_type)
|
|
|
+ cls.get_follow_users_to_feishu(log_type)
|
|
|
else:
|
|
|
Common.logger(log_type).info("从小程序中同步关注用户至云文档完成\n")
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("从小程序中,关注用户列表同步至云文档异常:{}", e)
|
|
|
+ Common.logger(log_type).error("从小程序中,关注用户列表同步至云文档异常:{}\n", e)
|
|
|
|
|
|
# 从云文档获取关注用户列表
|
|
|
@classmethod
|
|
@@ -172,17 +174,14 @@ class Follow:
|
|
|
if len(follow_sheet) == 1:
|
|
|
Common.logger(log_type).info("暂无关注用户")
|
|
|
else:
|
|
|
- follow_list = []
|
|
|
- nick_list = []
|
|
|
+ follow_dict = {}
|
|
|
for i in range(1, len(follow_sheet)):
|
|
|
uid = follow_sheet[i][0]
|
|
|
nick = follow_sheet[i][1]
|
|
|
- nick_list.append(nick)
|
|
|
- follow_list.append(uid)
|
|
|
- Common.logger(log_type).info("关注用户列表:{}", nick_list)
|
|
|
- return follow_list
|
|
|
+ follow_dict[nick] = uid
|
|
|
+ return follow_dict
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("从云文档获取关注用户列表异常:{}", e)
|
|
|
+ Common.logger(log_type).error("从云文档获取关注用户列表异常:{}\n", e)
|
|
|
|
|
|
# 从云文档获取取消关注用户列表
|
|
|
@classmethod
|
|
@@ -270,7 +269,7 @@ class Follow:
|
|
|
|
|
|
# 获取个人主页视频
|
|
|
@classmethod
|
|
|
- def get_videos_from_person(cls, log_type, uid):
|
|
|
+ def get_user_videos(cls, log_type, uid):
|
|
|
try:
|
|
|
time.sleep(1)
|
|
|
url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/profile?"
|
|
@@ -322,10 +321,11 @@ class Follow:
|
|
|
if "feeds" not in r.json():
|
|
|
# Feishu.bot(log_type, "follow:get_videos_from_person:"+r.text)
|
|
|
Common.logger(log_type).warning("response:{}", r.text)
|
|
|
+ elif r.json()["feeds"] == 0:
|
|
|
+ Common.logger(log_type).warning("用户主页无视频\n")
|
|
|
+ return
|
|
|
else:
|
|
|
feeds = r.json()["feeds"]
|
|
|
- if len(feeds) == 0:
|
|
|
- Common.logger(log_type).warning("用户主页无视频\n")
|
|
|
for i in range(len(feeds)):
|
|
|
# 视频标题过滤话题及处理特殊字符
|
|
|
kuaishou_title = feeds[i]["caption"]
|
|
@@ -458,13 +458,16 @@ class Follow:
|
|
|
or user_id == "0" \
|
|
|
or video_title == "":
|
|
|
Common.logger(log_type).info("无效视频\n")
|
|
|
- # 视频发布时间 <= 7 天
|
|
|
- elif int(time.time()) - int(int(video_send_time) / 1000) > 604800:
|
|
|
- Common.logger("follow").info("发布时间:{},超过7天\n", time.strftime(
|
|
|
- "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
|
|
|
- cls.person_pcursor = ""
|
|
|
- break
|
|
|
+ # # 视频发布时间 <= 7 天
|
|
|
+ # elif int(time.time()) - int(int(video_send_time) / 1000) > 604800:
|
|
|
+ # Common.logger("follow").info("发布时间:{},超过7天\n", time.strftime(
|
|
|
+ # "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
|
|
|
+ # cls.person_pcursor = ""
|
|
|
+ # break
|
|
|
# 判断敏感词
|
|
|
+ elif cls.download_rule(video_duration, video_width, video_height, video_play_cnt,
|
|
|
+ video_like_cnt, video_share_cnt) is False:
|
|
|
+ Common.logger(log_type).info("不满足下载规则\n".format(kuaishou_title))
|
|
|
elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
|
|
|
Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title))
|
|
|
# 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
|
|
@@ -499,34 +502,37 @@ class Follow:
|
|
|
time.sleep(1)
|
|
|
Feishu.update_values("follow", "kuaishou", "wW5cyb", "A2:P2", values)
|
|
|
Common.logger("follow").info("添加视频至follow_feeds成功:{}\n", video_title)
|
|
|
-
|
|
|
- # 翻页
|
|
|
- cls.person_pcursor = r.json()["pcursor"]
|
|
|
- # 视频发布时间 <= 7 天
|
|
|
- if int(time.time()) - int(cls.send_time) <= 604800:
|
|
|
- cls.get_videos_from_person(log_type, uid)
|
|
|
- else:
|
|
|
- cls.person_pcursor = ""
|
|
|
- return
|
|
|
+ cls.get_video_count.append(video_id)
|
|
|
+
|
|
|
+ # 抓取足够多数量的视频
|
|
|
+ if len(cls.get_video_count) >= 2:
|
|
|
+ Common.logger(log_type).info('已抓取{}:{}条视频\n', user_name, len(cls.get_video_count))
|
|
|
+ cls.person_pcursor = ""
|
|
|
+ cls.get_video_count = []
|
|
|
+ return
|
|
|
+ if len(cls.get_video_count) < 2:
|
|
|
+ # 翻页
|
|
|
+ cls.person_pcursor = r.json()["pcursor"]
|
|
|
+ cls.get_user_videos(log_type, uid)
|
|
|
|
|
|
except Exception as e:
|
|
|
- # Feishu.bot(log_type, "follow:get_videos_from_person异常" + format(e))
|
|
|
Common.logger(log_type).error("get_videos_from_person异常:{}\n", e)
|
|
|
|
|
|
# 获取所有关注列表的用户视频
|
|
|
@classmethod
|
|
|
- def get_videos_from_follow(cls, log_type):
|
|
|
+ def get_videos_from_follow(cls, log_type, env):
|
|
|
try:
|
|
|
- # 已关注的用户列表 uids
|
|
|
- uid_sheet = Feishu.get_values_batch(log_type, "kuaishou", "2OLxLr")
|
|
|
- for i in range(1, len(uid_sheet)):
|
|
|
- uid = uid_sheet[i][0]
|
|
|
- nick = uid_sheet[i][1]
|
|
|
- Common.logger(log_type).info("获取用户:{}主页视频\n", nick)
|
|
|
- cls.get_videos_from_person(log_type, uid)
|
|
|
- time.sleep(random.randint(3, 5))
|
|
|
+ user_list = cls.get_follow_users(log_type)
|
|
|
+ if len(user_list) == 0:
|
|
|
+ Common.logger(log_type).warning('用户ID列表为空\n')
|
|
|
+ else:
|
|
|
+ for k, v in user_list.items():
|
|
|
+ Common.logger(log_type).info('正在获取 {} 主页视频\n', k)
|
|
|
+ cls.get_user_videos(log_type, str(v))
|
|
|
+ cls.run_download_publish(log_type, env)
|
|
|
+ time.sleep(random.randint(10, 30))
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("get_videos_from_follow异常:{}", e)
|
|
|
+ Common.logger(log_type).error('get_videos_from_follow异常:{}\n', e)
|
|
|
|
|
|
# 下载/上传
|
|
|
@classmethod
|
|
@@ -552,23 +558,6 @@ class Follow:
|
|
|
|
|
|
Common.logger(log_type).info("正在判断第{}行,视频:{}", i + 1, download_video_title)
|
|
|
|
|
|
- # Common.logger(log_type).info("download_video_id:{}", download_video_id)
|
|
|
- # Common.logger(log_type).info("download_video_title:{}", download_video_title)
|
|
|
- # Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
|
|
|
- # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
|
|
|
- # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
|
|
|
- # Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
|
|
|
- # Common.logger(log_type).info("download_video_duration:{}", download_video_duration)
|
|
|
- # Common.logger(log_type).info("download_video_resolution:{}", download_video_resolution)
|
|
|
- # Common.logger(log_type).info("download_video_width:{}", download_video_width)
|
|
|
- # Common.logger(log_type).info("download_video_height:{}", download_video_height)
|
|
|
- # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
|
|
|
- # Common.logger(log_type).info("download_user_name:{}", download_user_name)
|
|
|
- # Common.logger(log_type).info("download_user_id:{}", download_user_id)
|
|
|
- # Common.logger(log_type).info("download_head_url:{}", download_head_url)
|
|
|
- # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
|
|
|
- # Common.logger(log_type).info("download_video_url:{}", download_video_url)
|
|
|
-
|
|
|
# 过滤空行及空标题视频
|
|
|
if download_video_id is None \
|
|
|
or download_video_id == "" \
|
|
@@ -578,15 +567,15 @@ class Follow:
|
|
|
Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
|
|
|
Common.logger(log_type).warning("标题为空或空行,删除成功\n")
|
|
|
return
|
|
|
- # 下载规则
|
|
|
- elif cls.download_rule(
|
|
|
- download_video_duration, download_video_resolution.split("*")[0],
|
|
|
- download_video_resolution.split("*")[-1], download_video_play_cnt, download_video_like_cnt,
|
|
|
- download_video_share_cnt) is False:
|
|
|
- # 删除行或列,可选 ROWS、COLUMNS
|
|
|
- Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
|
|
|
- Common.logger(log_type).info("不满足下载规则:{},删除成功\n", download_video_title)
|
|
|
- return
|
|
|
+ # # 下载规则
|
|
|
+ # elif cls.download_rule(
|
|
|
+ # download_video_duration, download_video_resolution.split("*")[0],
|
|
|
+ # download_video_resolution.split("*")[-1], download_video_play_cnt, download_video_like_cnt,
|
|
|
+ # download_video_share_cnt) is False:
|
|
|
+ # # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ # Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
|
|
|
+ # Common.logger(log_type).info("不满足下载规则:{},删除成功\n", download_video_title)
|
|
|
+ # return
|
|
|
# 从已下载视频表中去重
|
|
|
elif download_video_id in [j for m in Feishu.get_values_batch(
|
|
|
log_type, "kuaishou", "3cd128") for j in m]:
|
|
@@ -659,8 +648,7 @@ class Follow:
|
|
|
Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
|
|
|
return
|
|
|
except Exception as e:
|
|
|
- # Feishu.bot(log_type, "follow:download_publish异常" + format(e))
|
|
|
- Common.logger(log_type).error("download_publish异常:{}", e)
|
|
|
+ Common.logger(log_type).error("download_publish异常:{}\n", e)
|
|
|
|
|
|
# 执行下载/上传
|
|
|
@classmethod
|
|
@@ -669,15 +657,17 @@ class Follow:
|
|
|
while True:
|
|
|
follow_feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
|
|
|
if len(follow_feeds_sheet) == 1:
|
|
|
- Common.logger(log_type).info("下载/上传完成")
|
|
|
+ Common.logger(log_type).info("下载/上传完成\n")
|
|
|
break
|
|
|
else:
|
|
|
cls.download_publish(log_type, env)
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("run_download_publish异常:{}", e)
|
|
|
+ Common.logger(log_type).error("run_download_publish异常:{}\n", e)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- # Follow.get_videos_from_follow("follow")
|
|
|
# Follow.follow_unfollow("follow", "follow", "687090964")
|
|
|
- Follow.get_videos_from_person("follow", "2413743952")
|
|
|
+ # Follow.get_videos_from_person("follow", "2413743952")
|
|
|
+ # Follow.get_follow_users_to_feishu('follow')
|
|
|
+ # Follow.get_follow_users('follow')
|
|
|
+ Follow.get_videos_from_follow('follow')
|