|
@@ -10,6 +10,7 @@ import time
|
|
|
import requests
|
|
|
import urllib3
|
|
|
from main.common import Common
|
|
|
+from main.feishu_lib import Feishu
|
|
|
from main.publish import Publish
|
|
|
|
|
|
proxies = {"http": None, "https": None}
|
|
@@ -71,9 +72,9 @@ class KuaiShou:
|
|
|
"""
|
|
|
if 600 >= int(float(d_duration)) >= 60:
|
|
|
if int(d_width) >= 720 or int(d_height) >= 720:
|
|
|
- if int(d_play_cnt) >= 50000:
|
|
|
- if int(d_like_cnt) >= 50000:
|
|
|
- if int(d_share_cnt) >= 2000:
|
|
|
+ if int(d_play_cnt) >= 5:
|
|
|
+ if int(d_like_cnt) >= 30000:
|
|
|
+ if int(d_share_cnt) >= 1000:
|
|
|
return True
|
|
|
else:
|
|
|
return False
|
|
@@ -87,10 +88,10 @@ class KuaiShou:
|
|
|
@classmethod
|
|
|
def kuaishou_get_recommend(cls):
|
|
|
"""
|
|
|
- 从快手小程序首页推荐获取视频list:
|
|
|
- 1.在 kuaishou_videoid.txt 中去重
|
|
|
- 2.在 kuaishou_feeds.txt 中去重
|
|
|
- 3.添加视频信息到 kuaishou_feeds.txt
|
|
|
+ 1.从快手小程序首页推荐,获取视频列表
|
|
|
+ 2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=kCSk2e 中去重
|
|
|
+ 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7 中去重
|
|
|
+ 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
|
|
|
"""
|
|
|
url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/recommend"
|
|
|
params = {
|
|
@@ -165,100 +166,129 @@ class KuaiShou:
|
|
|
r = requests.post(url=url, params=params, cookies=cookies, json=json_data, proxies=proxies, verify=False)
|
|
|
response = json.loads(r.content.decode("utf8"))
|
|
|
if "feeds" not in response:
|
|
|
- Common.crawler_log().info("获取快手视频 list 出错:{},休眠 10s".format(response))
|
|
|
+ Common.logger().warning("获取快手视频 list 出错:{},休眠 10s".format(response))
|
|
|
time.sleep(10)
|
|
|
else:
|
|
|
feeds = response["feeds"]
|
|
|
for i in range(len(feeds)):
|
|
|
+ # 视频标题过滤话题及处理特殊字符
|
|
|
+ kuaishou_title = feeds[i]["caption"]
|
|
|
+ title_split1 = kuaishou_title.split(" #")
|
|
|
+ if title_split1[0] != "":
|
|
|
+ title1 = title_split1[0]
|
|
|
+ else:
|
|
|
+ title1 = title_split1[-1]
|
|
|
+
|
|
|
+ title_split2 = title1.split(" #")
|
|
|
+ if title_split2[0] != "":
|
|
|
+ title2 = title_split2[0]
|
|
|
+ else:
|
|
|
+ title2 = title_split2[-1]
|
|
|
+
|
|
|
+ title_split3 = title2.split("@")
|
|
|
+ if title_split3[0] != "":
|
|
|
+ title3 = title_split3[0]
|
|
|
+ else:
|
|
|
+ title3 = title_split3[-1]
|
|
|
+
|
|
|
+ video_title = title3.strip().replace("\n", "") \
|
|
|
+ .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
+ .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
|
|
|
+ .replace("#", "").replace(".", "。").replace("\\", "") \
|
|
|
+ .replace(":", "").replace("*", "").replace("?", "") \
|
|
|
+ .replace("?", "").replace('"', "").replace("<", "") \
|
|
|
+ .replace(">", "").replace("|", "")
|
|
|
+ Common.logger().info("video_title:{}".format(video_title))
|
|
|
+
|
|
|
if "photoId" not in feeds[i]:
|
|
|
photo_id = "0"
|
|
|
- Common.crawler_log().info("photo_id:{}".format(photo_id))
|
|
|
+ Common.logger().info("photo_id:{}".format(photo_id))
|
|
|
else:
|
|
|
photo_id = feeds[i]["photoId"]
|
|
|
- Common.crawler_log().info("photo_id:{}".format(photo_id))
|
|
|
+ Common.logger().info("photo_id:{}".format(photo_id))
|
|
|
|
|
|
if "viewCount" not in feeds[i]:
|
|
|
video_play_cnt = "0"
|
|
|
- Common.crawler_log().info("video_play_cnt:0")
|
|
|
+ Common.logger().info("video_play_cnt:0")
|
|
|
else:
|
|
|
video_play_cnt = feeds[i]["viewCount"]
|
|
|
- Common.crawler_log().info("video_play_cnt:{}".format(video_play_cnt))
|
|
|
+ Common.logger().info("video_play_cnt:{}".format(video_play_cnt))
|
|
|
|
|
|
if "likeCount" not in feeds[i]:
|
|
|
video_like_cnt = "0"
|
|
|
- Common.crawler_log().info("video_like_cnt:0")
|
|
|
+ Common.logger().info("video_like_cnt:0")
|
|
|
else:
|
|
|
video_like_cnt = feeds[i]["likeCount"]
|
|
|
- Common.crawler_log().info("video_like_cnt:{}".format(video_like_cnt))
|
|
|
-
|
|
|
- if "headUrl" not in feeds[i]:
|
|
|
- head_url = "0"
|
|
|
- Common.crawler_log().info("head_url:不存在")
|
|
|
- else:
|
|
|
- head_url = feeds[i]["headUrl"]
|
|
|
- Common.crawler_log().info("head_url:{}".format(head_url))
|
|
|
-
|
|
|
- if len(feeds[i]["coverUrls"]) == 0:
|
|
|
- cover_url = "0"
|
|
|
- Common.crawler_log().info("cover_url:不存在")
|
|
|
- else:
|
|
|
- cover_url = feeds[i]["coverUrls"][0]["url"]
|
|
|
- Common.crawler_log().info("cover_url:{}".format(cover_url))
|
|
|
-
|
|
|
- if len(feeds[i]["mainMvUrls"]) == 0:
|
|
|
- video_url = "0"
|
|
|
- Common.crawler_log().info("video_url:不存在")
|
|
|
- else:
|
|
|
- video_url = feeds[i]["mainMvUrls"][0]["url"]
|
|
|
- Common.crawler_log().info("video_url:{}".format(video_url))
|
|
|
+ Common.logger().info("video_like_cnt:{}".format(video_like_cnt))
|
|
|
|
|
|
if "shareCount" not in feeds[i]:
|
|
|
video_share_cnt = "0"
|
|
|
- Common.crawler_log().info("video_share_cnt:0")
|
|
|
+ Common.logger().info("video_share_cnt:0")
|
|
|
else:
|
|
|
video_share_cnt = feeds[i]["shareCount"]
|
|
|
- Common.crawler_log().info("video_share_cnt:{}".format(video_share_cnt))
|
|
|
-
|
|
|
- if "width" not in feeds[i] or "height"not in feeds[i]:
|
|
|
- video_width = "0"
|
|
|
- video_height = "0"
|
|
|
- video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
- Common.crawler_log().info("无分辨率")
|
|
|
- else:
|
|
|
- video_width = feeds[i]["width"]
|
|
|
- video_height = feeds[i]["height"]
|
|
|
- video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
- Common.crawler_log().info("video_resolution:{}".format(video_resolution))
|
|
|
+ Common.logger().info("video_share_cnt:{}".format(video_share_cnt))
|
|
|
|
|
|
if "commentCount" not in feeds[i]:
|
|
|
video_comment_cnt = "0"
|
|
|
- Common.crawler_log().info("video_comment_cnt:0")
|
|
|
+ Common.logger().info("video_comment_cnt:0")
|
|
|
else:
|
|
|
video_comment_cnt = feeds[i]["commentCount"]
|
|
|
- Common.crawler_log().info("video_comment_cnt:{}".format(video_comment_cnt))
|
|
|
+ Common.logger().info("video_comment_cnt:{}".format(video_comment_cnt))
|
|
|
|
|
|
if "duration" not in feeds[i]:
|
|
|
video_duration = "0"
|
|
|
- Common.crawler_log().info("video_duration:不存在")
|
|
|
+ Common.logger().info("video_duration:不存在")
|
|
|
else:
|
|
|
- video_duration = int(int(feeds[i]["duration"])/1000)
|
|
|
- Common.crawler_log().info("video_duration:{}秒".format(video_duration))
|
|
|
+ video_duration = int(int(feeds[i]["duration"]) / 1000)
|
|
|
+ Common.logger().info("video_duration:{}秒".format(video_duration))
|
|
|
+
|
|
|
+ if "width" not in feeds[i] or "height" not in feeds[i]:
|
|
|
+ video_width = "0"
|
|
|
+ video_height = "0"
|
|
|
+ video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
+ Common.logger().info("无分辨率")
|
|
|
+ else:
|
|
|
+ video_width = feeds[i]["width"]
|
|
|
+ video_height = feeds[i]["height"]
|
|
|
+ video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
+ Common.logger().info("video_resolution:{}".format(video_resolution))
|
|
|
|
|
|
if "timestamp" not in feeds[i]:
|
|
|
video_send_time = "0"
|
|
|
- Common.crawler_log().info("video_send_time:不存在")
|
|
|
+ Common.logger().info("video_send_time:不存在")
|
|
|
else:
|
|
|
video_send_time = feeds[i]["timestamp"]
|
|
|
- Common.crawler_log().info("video_send_time:{}".format(
|
|
|
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000))))
|
|
|
+ Common.logger().info("video_send_time:{}".format(
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
|
|
|
|
|
|
- user_name = feeds[i]["userName"].strip().replace("\n", "")\
|
|
|
- .replace("/", "").replace("快手", "").replace(" ", "")\
|
|
|
+ user_name = feeds[i]["userName"].strip().replace("\n", "") \
|
|
|
+ .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
.replace(" ", "").replace("&NBSP", "").replace("\r", "")
|
|
|
- Common.crawler_log().info("user_name:{}".format(user_name))
|
|
|
+ Common.logger().info("user_name:{}".format(user_name))
|
|
|
|
|
|
user_id = feeds[i]["userId"]
|
|
|
- Common.crawler_log().info("user_id:{}".format(user_id))
|
|
|
+ Common.logger().info("user_id:{}".format(user_id))
|
|
|
+
|
|
|
+ if "headUrl" not in feeds[i]:
|
|
|
+ head_url = "0"
|
|
|
+ Common.logger().info("head_url:不存在")
|
|
|
+ else:
|
|
|
+ head_url = feeds[i]["headUrl"]
|
|
|
+ Common.logger().info("head_url:{}".format(head_url))
|
|
|
+
|
|
|
+ if len(feeds[i]["coverUrls"]) == 0:
|
|
|
+ cover_url = "0"
|
|
|
+ Common.logger().info("cover_url:不存在")
|
|
|
+ else:
|
|
|
+ cover_url = feeds[i]["coverUrls"][0]["url"]
|
|
|
+ Common.logger().info("cover_url:{}".format(cover_url))
|
|
|
+
|
|
|
+ if len(feeds[i]["mainMvUrls"]) == 0:
|
|
|
+ video_url = "0"
|
|
|
+ Common.logger().info("video_url:不存在")
|
|
|
+ else:
|
|
|
+ video_url = feeds[i]["mainMvUrls"][0]["url"]
|
|
|
+ Common.logger().info("video_url:{}".format(video_url))
|
|
|
|
|
|
# 视频标题过滤话题及处理特殊字符
|
|
|
kuaishou_title = feeds[i]["caption"]
|
|
@@ -288,173 +318,143 @@ class KuaiShou:
|
|
|
.replace("?", "").replace('"', "").replace("<", "")\
|
|
|
.replace(">", "").replace("|", "")
|
|
|
|
|
|
- Common.crawler_log().info("video_title:{}".format(video_title))
|
|
|
-
|
|
|
- # 从 kuaishou_videoid.txt 中去重
|
|
|
- photo_ids = Common.read_txt("kuaishou_videoid.txt")
|
|
|
- if photo_id in [p_id.strip() for p_id in photo_ids]:
|
|
|
- Common.crawler_log().info("该视频已下载:{}".format(video_title))
|
|
|
- pass
|
|
|
- else:
|
|
|
- Common.crawler_log().info("该视频未下载:{}".format(video_title))
|
|
|
-
|
|
|
- # 从 kuaishou_feeds.txt 中去重
|
|
|
- contents = Common.read_txt("kuaishou_feeds.txt")
|
|
|
- # kuaishou_feeds.txt 为空时,直接保存
|
|
|
- if len(contents) == 0 and head_url != "0" \
|
|
|
- and cover_url != "0" and video_url != "0" \
|
|
|
- and video_duration != "0" and photo_id != "0":
|
|
|
- # 判断敏感词
|
|
|
- if any(word if word in kuaishou_title else False
|
|
|
+ Common.logger().info("video_title:{}".format(video_title))
|
|
|
+
|
|
|
+ # 过滤无效视频
|
|
|
+ if photo_id == "0" \
|
|
|
+ or head_url == "0" \
|
|
|
+ or cover_url == "0"\
|
|
|
+ or video_url == "0"\
|
|
|
+ or video_duration == "0"\
|
|
|
+ or video_send_time == "0"\
|
|
|
+ or user_name == ""\
|
|
|
+ or video_title == "":
|
|
|
+ Common.logger().info("无效视频")
|
|
|
+ # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=kCSk2e
|
|
|
+ elif photo_id in [j for i in Feishu.get_values_batch("kCSk2e") for j in i]:
|
|
|
+ Common.logger().info("该视频已下载:{}", video_title)
|
|
|
+ # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
|
|
|
+ elif photo_id in [j for i in Feishu.get_values_batch("YWeCh7") for j in i]:
|
|
|
+ Common.logger().info("该视频已在feeds中:{}", video_title)
|
|
|
+ # 判断敏感词
|
|
|
+ elif any(word if word in kuaishou_title else False
|
|
|
for word in cls.kuaishou_sensitive_words()) is True:
|
|
|
- Common.crawler_log().info("视频已中敏感词:{}".format(kuaishou_title))
|
|
|
- else:
|
|
|
- basic_time = int(time.time())
|
|
|
- Common.crawler_log().info("添加视频信息至kuaishou_feeds.txt:{}".format(video_title))
|
|
|
- with open(r"./txt/kuaishou_feeds.txt", "a", encoding="UTF-8") as f_a:
|
|
|
- f_a.write(str(basic_time) + " + " +
|
|
|
- str(photo_id) + " + " +
|
|
|
- str(video_play_cnt) + " + " +
|
|
|
- str(video_title) + " + " +
|
|
|
- str(video_duration) + " + " +
|
|
|
- str(video_comment_cnt) + " + " +
|
|
|
- str(video_like_cnt) + " + " +
|
|
|
- str(video_share_cnt) + " + " +
|
|
|
- str(video_resolution) + " + " +
|
|
|
- str(video_send_time) + " + " +
|
|
|
- str(user_name) + " + " +
|
|
|
- str(head_url) + " + " +
|
|
|
- str(cover_url) + " + " +
|
|
|
- str(video_url) + " + " +
|
|
|
- str(user_id) + " + " +
|
|
|
- str("wxo_b07ba02ad4340205d89b47c76030bb090977") + "\n")
|
|
|
- else:
|
|
|
- if photo_id in [content.split(" + ")[1] for content in contents]:
|
|
|
- Common.crawler_log().info("该视频已在 kuaishou_feeds.txt 中:{}".format(video_title))
|
|
|
- elif head_url == "0" or cover_url == "0" \
|
|
|
- or video_url == "0" or video_duration == "0" or photo_id == "0":
|
|
|
- Common.crawler_log().info("视频封面/播放地址/播放时长/用户头像不存在")
|
|
|
- else:
|
|
|
- # 判断敏感词
|
|
|
- if any(word if word in kuaishou_title else False
|
|
|
- for word in cls.kuaishou_sensitive_words()) is True:
|
|
|
- Common.crawler_log().info("视频已中敏感词:{}".format(kuaishou_title))
|
|
|
- else:
|
|
|
- basic_time = int(time.time())
|
|
|
- Common.crawler_log().info("添加视频信息至kuaishou_feeds.txt:{}".format(video_title))
|
|
|
- with open(r"./txt/kuaishou_feeds.txt", "a", encoding="UTF-8") as f_a:
|
|
|
- f_a.write(str(basic_time) + " + " +
|
|
|
- str(photo_id) + " + " +
|
|
|
- str(video_play_cnt) + " + " +
|
|
|
- str(video_title) + " + " +
|
|
|
- str(video_duration) + " + " +
|
|
|
- str(video_comment_cnt) + " + " +
|
|
|
- str(video_like_cnt) + " + " +
|
|
|
- str(video_share_cnt) + " + " +
|
|
|
- str(video_resolution) + " + " +
|
|
|
- str(video_send_time) + " + " +
|
|
|
- str(user_name) + " + " +
|
|
|
- str(head_url) + " + " +
|
|
|
- str(cover_url) + " + " +
|
|
|
- str(video_url) + " + " +
|
|
|
- str(user_id) + " + " +
|
|
|
- str("wxo_b07ba02ad4340205d89b47c76030bb090977") + "\n")
|
|
|
+ Common.logger().info("视频已中敏感词:{}".format(kuaishou_title))
|
|
|
+ else:
|
|
|
+ Common.logger().info("该视频未下载,添加至feeds中:{}".format(video_title))
|
|
|
+ # feeds工作表,插入首行
|
|
|
+ Feishu.insert_columns("YWeCh7")
|
|
|
+
|
|
|
+ # 获取当前时间
|
|
|
+ get_feeds_time = int(time.time())
|
|
|
+ # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
|
|
|
+ Feishu.update_values("YWeCh7",
|
|
|
+ a1=str(get_feeds_time),
|
|
|
+ b1=str(photo_id),
|
|
|
+ c1=str(video_play_cnt),
|
|
|
+ d1=str(video_title),
|
|
|
+ e1=str(video_duration),
|
|
|
+ f1=str(video_comment_cnt),
|
|
|
+ g1=str(video_like_cnt),
|
|
|
+ h1=str(video_share_cnt),
|
|
|
+ i1=str(video_resolution),
|
|
|
+ j1=str(video_send_time),
|
|
|
+ k1=str(user_name),
|
|
|
+ l1=str(head_url),
|
|
|
+ m1=str(cover_url),
|
|
|
+ n1=str(video_url),
|
|
|
+ o1=str("wxo_b07ba02ad4340205d89b47c76030bb090977"))
|
|
|
except Exception as e:
|
|
|
- Common.crawler_log().error("获取视频 list 异常:{}".format(e))
|
|
|
+ Common.logger().error("获取视频 list 异常:{}".format(e))
|
|
|
|
|
|
@classmethod
|
|
|
def kuaishou_download_play_video(cls, env):
|
|
|
"""
|
|
|
- 下载播放量视频
|
|
|
+ 1.从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7 中读取视频信息
|
|
|
+ 2.下载并上传符合规则的视频
|
|
|
测试环境:env == dev
|
|
|
正式环境:env == prod
|
|
|
"""
|
|
|
- videos = Common.read_txt("kuaishou_feeds.txt")
|
|
|
- for video in videos:
|
|
|
- download_photo_id = video.strip().split(" + ")[1]
|
|
|
- download_video_title = video.strip().split(" + ")[3]
|
|
|
- download_video_duration = video.strip().split(" + ")[4]
|
|
|
- download_video_play_cnt = video.strip().split(" + ")[2]
|
|
|
- download_video_comment_cnt = video.strip().split(" + ")[5]
|
|
|
- download_video_like_cnt = video.strip().split(" + ")[6]
|
|
|
- download_video_share_cnt = video.strip().split(" + ")[7]
|
|
|
- download_video_resolution = video.strip().split(" + ")[8]
|
|
|
- download_video_width = download_video_resolution.split("*")[0]
|
|
|
- download_video_height = download_video_resolution.split("*")[-1]
|
|
|
- download_video_send_time = video.strip().split(" + ")[9]
|
|
|
- download_user_name = video.strip().split(" + ")[10]
|
|
|
- download_head_url = video.strip().split(" + ")[11]
|
|
|
- download_cover_url = video.strip().split(" + ")[12]
|
|
|
- download_video_url = video.strip().split(" + ")[13]
|
|
|
- download_video_session = video.strip().split(" + ")[-1]
|
|
|
-
|
|
|
- if cls.kuaishou_download_rule(download_video_duration,
|
|
|
- download_video_width,
|
|
|
- download_video_height,
|
|
|
- download_video_play_cnt,
|
|
|
- download_video_like_cnt,
|
|
|
- download_video_share_cnt) is True:
|
|
|
- Common.crawler_log().info("开始下载快手视频:{}".format(download_video_title))
|
|
|
- # 下载封面
|
|
|
- Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
|
|
|
- # 下载视频
|
|
|
- Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
|
|
|
-
|
|
|
- # 保存视频信息至 kuaishou_videoid.txt
|
|
|
- with open(r"./txt/kuaishou_videoid.txt", "a", encoding="UTF-8") as fa:
|
|
|
- fa.write(download_photo_id + "\n")
|
|
|
-
|
|
|
- # 添加视频 ID 到 list,用于统计当次下载总数
|
|
|
- cls.download_video_list.append(download_photo_id)
|
|
|
-
|
|
|
- # # 保存视频信息至 {today}_kuaishou_videoid.txt
|
|
|
- # with open("./txt/" + str(Common.today) + "_kuaishou_videoid.txt", "a", encoding="UTF-8") as fc:
|
|
|
- # fc.write(download_photo_id + "\n")
|
|
|
-
|
|
|
- # 保存视频信息至 "./videos/{download_video_title}/info.txt"
|
|
|
- with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
|
|
|
- f_a.write(str(download_photo_id) + "\n" +
|
|
|
- str(download_video_title) + "\n" +
|
|
|
- str(download_video_duration) + "\n" +
|
|
|
- str(download_video_play_cnt) + "\n" +
|
|
|
- str(download_video_comment_cnt) + "\n" +
|
|
|
- str(download_video_like_cnt) + "\n" +
|
|
|
- str(download_video_share_cnt) + "\n" +
|
|
|
- str(download_video_resolution) + "\n" +
|
|
|
- str(download_video_send_time) + "\n" +
|
|
|
- str(download_user_name) + "\n" +
|
|
|
- str(download_head_url) + "\n" +
|
|
|
- str(download_video_url) + "\n" +
|
|
|
- str(download_cover_url) + "\n" +
|
|
|
- str(download_video_session))
|
|
|
-
|
|
|
- # 上传视频
|
|
|
- if env == "dev":
|
|
|
- Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
|
|
|
- Publish.upload_and_publish("dev", "play")
|
|
|
- elif env == "prod":
|
|
|
- Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
|
|
|
- Publish.upload_and_publish("prod", "play")
|
|
|
-
|
|
|
- # 删除该视频在kuaishou_feeds.txt中的信息
|
|
|
- Common.crawler_log().info("删除该视频在kuaishou_feeds.txt中的信息:{}".format(download_video_title))
|
|
|
- with open(r"./txt/kuaishou_feeds.txt", "r", encoding="UTF-8") as f_r:
|
|
|
- lines = f_r.readlines()
|
|
|
- with open(r"./txt/kuaishou_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
- for line in lines:
|
|
|
- if download_photo_id in line.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w.write(line)
|
|
|
- else:
|
|
|
- # 删除该视频在 recommend.txt中的信息
|
|
|
- Common.crawler_log().info("该视频不满足下载规则,删除在kuaishou_feeds.txt中的信息:{}".format(download_video_title))
|
|
|
- with open(r"./txt/kuaishou_feeds.txt", "r", encoding="UTF-8") as f_r:
|
|
|
- lines = f_r.readlines()
|
|
|
- with open(r"./txt/kuaishou_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
- for line in lines:
|
|
|
- if download_photo_id in line.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w.write(line)
|
|
|
+ if len(Feishu.get_values_batch("YWeCh7")) == 1:
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ for i in range(len(Feishu.get_values_batch("YWeCh7"))):
|
|
|
+ try:
|
|
|
+ download_photo_id = Feishu.get_values_batch("YWeCh7")[i + 1][1]
|
|
|
+ download_video_play_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][2]
|
|
|
+ download_video_title = Feishu.get_values_batch("YWeCh7")[i + 1][3]
|
|
|
+ download_video_duration = Feishu.get_values_batch("YWeCh7")[i + 1][4]
|
|
|
+ download_video_comment_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][5]
|
|
|
+ download_video_like_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][6]
|
|
|
+ download_video_share_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][7]
|
|
|
+ download_video_resolution = Feishu.get_values_batch("YWeCh7")[i + 1][8]
|
|
|
+ download_video_width = download_video_resolution.split("*")[0]
|
|
|
+ download_video_height = download_video_resolution.split("*")[-1]
|
|
|
+ download_video_send_time = Feishu.get_values_batch("YWeCh7")[i + 1][9]
|
|
|
+ download_user_name = Feishu.get_values_batch("YWeCh7")[i + 1][10]
|
|
|
+ download_head_url = Feishu.get_values_batch("YWeCh7")[i + 1][11]
|
|
|
+ download_cover_url = Feishu.get_values_batch("YWeCh7")[i + 1][12]
|
|
|
+ download_video_url = Feishu.get_values_batch("YWeCh7")[i + 1][13]
|
|
|
+ download_video_session = Feishu.get_values_batch("YWeCh7")[i + 1][14]
|
|
|
+
|
|
|
+ # 下载规则
|
|
|
+ if cls.kuaishou_download_rule(download_video_duration,
|
|
|
+ download_video_width,
|
|
|
+ download_video_height,
|
|
|
+ download_video_play_cnt,
|
|
|
+ download_video_like_cnt,
|
|
|
+ download_video_share_cnt) is True:
|
|
|
+ Common.logger().info("开始下载快手视频:{}".format(download_video_title))
|
|
|
+ # 下载封面
|
|
|
+ Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
|
|
|
+ # 下载视频
|
|
|
+ Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
|
|
|
+ # 保存视频信息至 "./videos/{download_video_title}/info.txt"
|
|
|
+ with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
|
|
|
+ f_a.write(str(download_photo_id) + "\n" +
|
|
|
+ str(download_video_title) + "\n" +
|
|
|
+ str(download_video_duration) + "\n" +
|
|
|
+ str(download_video_play_cnt) + "\n" +
|
|
|
+ str(download_video_comment_cnt) + "\n" +
|
|
|
+ str(download_video_like_cnt) + "\n" +
|
|
|
+ str(download_video_share_cnt) + "\n" +
|
|
|
+ str(download_video_resolution) + "\n" +
|
|
|
+ str(download_video_send_time) + "\n" +
|
|
|
+ str(download_user_name) + "\n" +
|
|
|
+ str(download_head_url) + "\n" +
|
|
|
+ str(download_video_url) + "\n" +
|
|
|
+ str(download_cover_url) + "\n" +
|
|
|
+ str(download_video_session))
|
|
|
+
|
|
|
+ # 添加视频 ID 到 list,用于统计当次下载总数
|
|
|
+ cls.download_video_list.append(download_photo_id)
|
|
|
+
|
|
|
+ # 上传视频
|
|
|
+ Common.logger().info("开始上传视频:{}".format(download_video_title))
|
|
|
+ Publish.upload_and_publish(env, "play")
|
|
|
+
|
|
|
+ # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=kCSk2e
|
|
|
+ Common.logger().info("保存视频ID至云文档:{}", download_video_title)
|
|
|
+ # 视频ID工作表,插入首行
|
|
|
+ Feishu.insert_columns("kCSk2e")
|
|
|
+ # 视频ID工作表,首行写入数据
|
|
|
+ Feishu.update_values("kCSk2e", download_photo_id, "", "", "",
|
|
|
+ "", "", "", "", "", "", "", "", "", "", "")
|
|
|
+
|
|
|
+ # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
|
|
|
+ Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("YWeCh7", "ROWS", i + 2, i + 2)
|
|
|
+ else:
|
|
|
+ # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
|
|
|
+ Common.logger().info("该视频不满足下载规则,删除在云文档中的信息:{}", download_video_title)
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("YWeCh7", "ROWS", i + 2, i + 2)
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger().error("视频 info 异常,删除该视频信息", e)
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("YWeCh7", "ROWS", i + 2, i + 2)
|
|
|
+ cls.kuaishou_download_play_video("prod")
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|