|
@@ -317,197 +317,198 @@ class Follow:
|
|
|
urllib3.disable_warnings()
|
|
|
r = requests.post(url=url, headers=headers, params=params, cookies=cookies,
|
|
|
json=json_text, proxies=proxies, verify=False)
|
|
|
- # Common.logger(log_type).info("response:{}", r.text)
|
|
|
+ # Common.logger(log_type).info("response:{}\n\n", r.text)
|
|
|
if "feeds" not in r.json():
|
|
|
Common.logger(log_type).warning("response:{}", r.text)
|
|
|
- feeds = r.json()["feeds"]
|
|
|
- if len(feeds) == 0:
|
|
|
- Common.logger(log_type).warning("用户主页无视频\n")
|
|
|
- for i in range(len(feeds)):
|
|
|
- # 视频标题过滤话题及处理特殊字符
|
|
|
- kuaishou_title = feeds[i]["caption"]
|
|
|
- title_split1 = kuaishou_title.split(" #")
|
|
|
- if title_split1[0] != "":
|
|
|
- title1 = title_split1[0]
|
|
|
- else:
|
|
|
- title1 = title_split1[-1]
|
|
|
+ else:
|
|
|
+ feeds = r.json()["feeds"]
|
|
|
+ if len(feeds) == 0:
|
|
|
+ Common.logger(log_type).warning("用户主页无视频\n")
|
|
|
+ for i in range(len(feeds)):
|
|
|
+ # 视频标题过滤话题及处理特殊字符
|
|
|
+ kuaishou_title = feeds[i]["caption"]
|
|
|
+ title_split1 = kuaishou_title.split(" #")
|
|
|
+ if title_split1[0] != "":
|
|
|
+ title1 = title_split1[0]
|
|
|
+ else:
|
|
|
+ title1 = title_split1[-1]
|
|
|
|
|
|
- title_split2 = title1.split(" #")
|
|
|
- if title_split2[0] != "":
|
|
|
- title2 = title_split2[0]
|
|
|
- else:
|
|
|
- title2 = title_split2[-1]
|
|
|
+ title_split2 = title1.split(" #")
|
|
|
+ if title_split2[0] != "":
|
|
|
+ title2 = title_split2[0]
|
|
|
+ else:
|
|
|
+ title2 = title_split2[-1]
|
|
|
|
|
|
- title_split3 = title2.split("@")
|
|
|
- if title_split3[0] != "":
|
|
|
- title3 = title_split3[0]
|
|
|
- else:
|
|
|
- title3 = title_split3[-1]
|
|
|
-
|
|
|
- video_title = title3.strip().replace("\n", "") \
|
|
|
- .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
- .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
|
|
|
- .replace("#", "").replace(".", "。").replace("\\", "") \
|
|
|
- .replace(":", "").replace("*", "").replace("?", "") \
|
|
|
- .replace("?", "").replace('"', "").replace("<", "") \
|
|
|
- .replace(">", "").replace("|", "").replace("@", "")
|
|
|
-
|
|
|
- if "photoId" not in feeds[i]:
|
|
|
- video_id = "0"
|
|
|
- else:
|
|
|
- video_id = feeds[i]["photoId"]
|
|
|
+ title_split3 = title2.split("@")
|
|
|
+ if title_split3[0] != "":
|
|
|
+ title3 = title_split3[0]
|
|
|
+ else:
|
|
|
+ title3 = title_split3[-1]
|
|
|
|
|
|
- if "viewCount" not in feeds[i]:
|
|
|
- video_play_cnt = "0"
|
|
|
- else:
|
|
|
- video_play_cnt = feeds[i]["viewCount"]
|
|
|
+ video_title = title3.strip().replace("\n", "") \
|
|
|
+ .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
+ .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
|
|
|
+ .replace("#", "").replace(".", "。").replace("\\", "") \
|
|
|
+ .replace(":", "").replace("*", "").replace("?", "") \
|
|
|
+ .replace("?", "").replace('"', "").replace("<", "") \
|
|
|
+ .replace(">", "").replace("|", "").replace("@", "")
|
|
|
+
|
|
|
+ if "photoId" not in feeds[i]:
|
|
|
+ video_id = "0"
|
|
|
+ else:
|
|
|
+ video_id = feeds[i]["photoId"]
|
|
|
|
|
|
- if "likeCount" not in feeds[i]:
|
|
|
- video_like_cnt = "0"
|
|
|
- else:
|
|
|
- video_like_cnt = feeds[i]["likeCount"]
|
|
|
+ if "viewCount" not in feeds[i]:
|
|
|
+ video_play_cnt = "0"
|
|
|
+ else:
|
|
|
+ video_play_cnt = feeds[i]["viewCount"]
|
|
|
|
|
|
- if "shareCount" not in feeds[i]:
|
|
|
- video_share_cnt = "0"
|
|
|
- else:
|
|
|
- video_share_cnt = feeds[i]["shareCount"]
|
|
|
+ if "likeCount" not in feeds[i]:
|
|
|
+ video_like_cnt = "0"
|
|
|
+ else:
|
|
|
+ video_like_cnt = feeds[i]["likeCount"]
|
|
|
|
|
|
- if "commentCount" not in feeds[i]:
|
|
|
- video_comment_cnt = "0"
|
|
|
- else:
|
|
|
- video_comment_cnt = feeds[i]["commentCount"]
|
|
|
+ if "shareCount" not in feeds[i]:
|
|
|
+ video_share_cnt = "0"
|
|
|
+ else:
|
|
|
+ video_share_cnt = feeds[i]["shareCount"]
|
|
|
|
|
|
- if "duration" not in feeds[i]:
|
|
|
- video_duration = "0"
|
|
|
- else:
|
|
|
- video_duration = int(int(feeds[i]["duration"]) / 1000)
|
|
|
+ if "commentCount" not in feeds[i]:
|
|
|
+ video_comment_cnt = "0"
|
|
|
+ else:
|
|
|
+ video_comment_cnt = feeds[i]["commentCount"]
|
|
|
|
|
|
- if "width" not in feeds[i] or "height" not in feeds[i]:
|
|
|
- video_width = "0"
|
|
|
- video_height = "0"
|
|
|
- else:
|
|
|
- video_width = feeds[i]["width"]
|
|
|
- video_height = feeds[i]["height"]
|
|
|
+ if "duration" not in feeds[i]:
|
|
|
+ video_duration = "0"
|
|
|
+ else:
|
|
|
+ video_duration = int(int(feeds[i]["duration"]) / 1000)
|
|
|
|
|
|
- if "timestamp" not in feeds[i]:
|
|
|
- video_send_time = "0"
|
|
|
- else:
|
|
|
- video_send_time = feeds[i]["timestamp"]
|
|
|
- cls.send_time = int(int(video_send_time) / 1000)
|
|
|
+ if "width" not in feeds[i] or "height" not in feeds[i]:
|
|
|
+ video_width = "0"
|
|
|
+ video_height = "0"
|
|
|
+ else:
|
|
|
+ video_width = feeds[i]["width"]
|
|
|
+ video_height = feeds[i]["height"]
|
|
|
|
|
|
- if "userName" not in feeds[i]:
|
|
|
- user_name = "0"
|
|
|
- else:
|
|
|
- user_name = feeds[i]["userName"].strip().replace("\n", "") \
|
|
|
- .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
- .replace(" ", "").replace("&NBSP", "").replace("\r", "")
|
|
|
+ if "timestamp" not in feeds[i]:
|
|
|
+ video_send_time = "0"
|
|
|
+ else:
|
|
|
+ video_send_time = feeds[i]["timestamp"]
|
|
|
+ cls.send_time = int(int(video_send_time) / 1000)
|
|
|
|
|
|
- if "userId" not in feeds[i]:
|
|
|
- user_id = "0"
|
|
|
- else:
|
|
|
- user_id = feeds[i]["userId"]
|
|
|
+ if "userName" not in feeds[i]:
|
|
|
+ user_name = "0"
|
|
|
+ else:
|
|
|
+ user_name = feeds[i]["userName"].strip().replace("\n", "") \
|
|
|
+ .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
+ .replace(" ", "").replace("&NBSP", "").replace("\r", "")
|
|
|
|
|
|
- if "headUrl" not in feeds[i]:
|
|
|
- head_url = "0"
|
|
|
- else:
|
|
|
- head_url = feeds[i]["headUrl"]
|
|
|
-
|
|
|
- if "webpCoverUrls" in feeds[i]:
|
|
|
- cover_url = feeds[i]["webpCoverUrls"][-1]["url"]
|
|
|
- elif "coverUrls" not in feeds[i]:
|
|
|
- cover_url = "0"
|
|
|
- elif len(feeds[i]["coverUrls"]) == 0:
|
|
|
- cover_url = "0"
|
|
|
- else:
|
|
|
- cover_url = feeds[i]["coverUrls"][0]["url"]
|
|
|
+ if "userId" not in feeds[i]:
|
|
|
+ user_id = "0"
|
|
|
+ else:
|
|
|
+ user_id = feeds[i]["userId"]
|
|
|
|
|
|
- if "mainMvUrls" not in feeds[i]:
|
|
|
- video_url = "0"
|
|
|
- elif len(feeds[i]["mainMvUrls"]) == 0:
|
|
|
- video_url = "0"
|
|
|
- else:
|
|
|
- video_url = feeds[i]["mainMvUrls"][0]["url"]
|
|
|
-
|
|
|
- Common.logger(log_type).info("video_title:{}".format(video_title))
|
|
|
- Common.logger(log_type).info("user_name:{}".format(user_name))
|
|
|
- Common.logger(log_type).info("video_id:{}".format(video_id))
|
|
|
- Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
|
|
|
- Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
|
|
|
- Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
|
|
|
- # Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
|
|
|
- Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
|
|
|
- # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
|
|
|
- Common.logger(log_type).info("video_send_time:{}".format(
|
|
|
- time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
|
|
|
- # Common.logger(log_type).info("user_id:{}".format(user_id))
|
|
|
- # Common.logger(log_type).info("head_url:{}".format(head_url))
|
|
|
- # Common.logger(log_type).info("cover_url:{}".format(cover_url))
|
|
|
- Common.logger(log_type).info("video_url:{}".format(video_url))
|
|
|
-
|
|
|
- # 过滤无效视频
|
|
|
- if video_id == "0" \
|
|
|
- or head_url == "0" \
|
|
|
- or cover_url == "0" \
|
|
|
- or video_url == "0" \
|
|
|
- or video_duration == "0" \
|
|
|
- or video_send_time == "0" \
|
|
|
- or user_name == "0" \
|
|
|
- or user_id == "0" \
|
|
|
- or video_title == "":
|
|
|
- Common.logger(log_type).info("无效视频\n")
|
|
|
+ if "headUrl" not in feeds[i]:
|
|
|
+ head_url = "0"
|
|
|
+ else:
|
|
|
+ head_url = feeds[i]["headUrl"]
|
|
|
+
|
|
|
+ if "webpCoverUrls" in feeds[i]:
|
|
|
+ cover_url = feeds[i]["webpCoverUrls"][-1]["url"]
|
|
|
+ elif "coverUrls" not in feeds[i]:
|
|
|
+ cover_url = "0"
|
|
|
+ elif len(feeds[i]["coverUrls"]) == 0:
|
|
|
+ cover_url = "0"
|
|
|
+ else:
|
|
|
+ cover_url = feeds[i]["coverUrls"][0]["url"]
|
|
|
+
|
|
|
+ if "mainMvUrls" not in feeds[i]:
|
|
|
+ video_url = "0"
|
|
|
+ elif len(feeds[i]["mainMvUrls"]) == 0:
|
|
|
+ video_url = "0"
|
|
|
+ else:
|
|
|
+ video_url = feeds[i]["mainMvUrls"][0]["url"]
|
|
|
+
|
|
|
+ Common.logger(log_type).info("video_title:{}".format(video_title))
|
|
|
+ Common.logger(log_type).info("user_name:{}".format(user_name))
|
|
|
+ Common.logger(log_type).info("video_id:{}".format(video_id))
|
|
|
+ Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
|
|
|
+ Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
|
|
|
+ Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
|
|
|
+ # Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
|
|
|
+ Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
|
|
|
+ # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
|
|
|
+ Common.logger(log_type).info("video_send_time:{}".format(
|
|
|
+ time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
|
|
|
+ # Common.logger(log_type).info("user_id:{}".format(user_id))
|
|
|
+ # Common.logger(log_type).info("head_url:{}".format(head_url))
|
|
|
+ # Common.logger(log_type).info("cover_url:{}".format(cover_url))
|
|
|
+ Common.logger(log_type).info("video_url:{}".format(video_url))
|
|
|
+
|
|
|
+ # 过滤无效视频
|
|
|
+ if video_id == "0" \
|
|
|
+ or head_url == "0" \
|
|
|
+ or cover_url == "0" \
|
|
|
+ or video_url == "0" \
|
|
|
+ or video_duration == "0" \
|
|
|
+ or video_send_time == "0" \
|
|
|
+ or user_name == "0" \
|
|
|
+ or user_id == "0" \
|
|
|
+ or video_title == "":
|
|
|
+ Common.logger(log_type).info("无效视频\n")
|
|
|
+ # 视频发布时间 <= 7 天
|
|
|
+ elif int(time.time()) - int(int(video_send_time) / 1000) > 604800:
|
|
|
+ Common.logger("follow").info("发布时间:{},超过7天\n", time.strftime(
|
|
|
+ "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
|
|
|
+ cls.person_pcursor = ""
|
|
|
+ break
|
|
|
+ # 判断敏感词
|
|
|
+ elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
|
|
|
+ Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title))
|
|
|
+ # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
|
|
|
+ elif video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3cd128") for j in m]:
|
|
|
+ Common.logger(log_type).info("该视频已下载:{}\n", video_title)
|
|
|
+ # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=wW5cyb
|
|
|
+ elif video_id in [j for n in Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb") for j in n]:
|
|
|
+ Common.logger(log_type).info("该视频已在feeds中:{}\n", video_title)
|
|
|
+ else:
|
|
|
+ Feishu.insert_columns("follow", "kuaishou", "wW5cyb", "ROWS", 1, 2)
|
|
|
+ # 获取当前时间
|
|
|
+ get_feeds_time = int(time.time())
|
|
|
+ # 工作表中写入数据
|
|
|
+ values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
|
|
|
+ "用户主页",
|
|
|
+ video_id,
|
|
|
+ video_title,
|
|
|
+ video_play_cnt,
|
|
|
+ video_comment_cnt,
|
|
|
+ video_like_cnt,
|
|
|
+ video_share_cnt,
|
|
|
+ video_duration,
|
|
|
+ str(video_width) + "*" + str(video_height),
|
|
|
+ time.strftime(
|
|
|
+ "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
|
|
|
+ user_name,
|
|
|
+ user_id,
|
|
|
+ head_url,
|
|
|
+ cover_url,
|
|
|
+ video_url]]
|
|
|
+ # 等待 1s,防止操作云文档太频繁,导致报错
|
|
|
+ time.sleep(1)
|
|
|
+ Feishu.update_values("follow", "kuaishou", "wW5cyb", "A2:P2", values)
|
|
|
+ Common.logger("follow").info("添加视频至follow_feeds成功:{}\n", video_title)
|
|
|
+
|
|
|
+ # 翻页
|
|
|
+ cls.person_pcursor = r.json()["pcursor"]
|
|
|
# 视频发布时间 <= 7 天
|
|
|
- elif int(time.time()) - int(int(video_send_time) / 1000) > 604800:
|
|
|
- Common.logger("follow").info("发布时间:{},超过7天\n", time.strftime(
|
|
|
- "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
|
|
|
- cls.person_pcursor = ""
|
|
|
- break
|
|
|
- # 判断敏感词
|
|
|
- elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
|
|
|
- Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title))
|
|
|
- # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
|
|
|
- elif video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3cd128") for j in m]:
|
|
|
- Common.logger(log_type).info("该视频已下载:{}\n", video_title)
|
|
|
- # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=wW5cyb
|
|
|
- elif video_id in [j for n in Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb") for j in n]:
|
|
|
- Common.logger(log_type).info("该视频已在feeds中:{}\n", video_title)
|
|
|
+ if int(time.time()) - int(cls.send_time) <= 604800:
|
|
|
+ cls.get_videos_from_person(log_type, uid)
|
|
|
else:
|
|
|
- Feishu.insert_columns("follow", "kuaishou", "wW5cyb", "ROWS", 1, 2)
|
|
|
- # 获取当前时间
|
|
|
- get_feeds_time = int(time.time())
|
|
|
- # 工作表中写入数据
|
|
|
- values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
|
|
|
- "用户主页",
|
|
|
- video_id,
|
|
|
- video_title,
|
|
|
- video_play_cnt,
|
|
|
- video_comment_cnt,
|
|
|
- video_like_cnt,
|
|
|
- video_share_cnt,
|
|
|
- video_duration,
|
|
|
- str(video_width) + "*" + str(video_height),
|
|
|
- time.strftime(
|
|
|
- "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
|
|
|
- user_name,
|
|
|
- user_id,
|
|
|
- head_url,
|
|
|
- cover_url,
|
|
|
- video_url]]
|
|
|
- # 等待 1s,防止操作云文档太频繁,导致报错
|
|
|
- time.sleep(1)
|
|
|
- Feishu.update_values("follow", "kuaishou", "wW5cyb", "A2:P2", values)
|
|
|
- Common.logger("follow").info("添加视频至follow_feeds成功:{}\n", video_title)
|
|
|
-
|
|
|
- # 翻页
|
|
|
- cls.person_pcursor = r.json()["pcursor"]
|
|
|
- # 视频发布时间 <= 7 天
|
|
|
- if int(time.time()) - int(cls.send_time) <= 604800:
|
|
|
- cls.get_videos_from_person(log_type, uid)
|
|
|
- else:
|
|
|
- cls.person_pcursor = ""
|
|
|
- return
|
|
|
+ cls.person_pcursor = ""
|
|
|
+ return
|
|
|
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("获取个人主页视频异常:{}\n", e)
|
|
|
+ Common.logger(log_type).error("get_videos_from_person异常:{}\n", e)
|
|
|
|
|
|
# 获取所有关注列表的用户视频
|
|
|
@classmethod
|
|
@@ -522,7 +523,7 @@ class Follow:
|
|
|
cls.get_videos_from_person(log_type, uid)
|
|
|
time.sleep(random.randint(3, 5))
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("获取用户主页视频异常:{}", e)
|
|
|
+ Common.logger(log_type).error("get_videos_from_follow异常:{}", e)
|
|
|
|
|
|
# 下载/上传
|
|
|
@classmethod
|
|
@@ -654,7 +655,7 @@ class Follow:
|
|
|
Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
|
|
|
return
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("下载/上传异常:{}", e)
|
|
|
+ Common.logger(log_type).error("download_publish异常:{}", e)
|
|
|
|
|
|
# 执行下载/上传
|
|
|
@classmethod
|
|
@@ -668,7 +669,7 @@ class Follow:
|
|
|
else:
|
|
|
cls.download_publish(log_type, env)
|
|
|
except Exception as e:
|
|
|
- Common.logger(log_type).error("执行下载/上传异常:{}", e)
|
|
|
+ Common.logger(log_type).error("run_download_publish异常:{}", e)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|