|
@@ -20,6 +20,17 @@ proxies = {"http": None, "https": None}
|
|
|
|
|
|
|
|
|
class BSZF:
|
|
|
+ # 已下载视频列表
|
|
|
+ download_video_list = []
|
|
|
+
|
|
|
+ # 过滤关键字
|
|
|
+ @classmethod
|
|
|
+ def sensitive_words(cls):
|
|
|
+ sensitive_words = [
|
|
|
+ "早上好",
|
|
|
+ "晚上好",
|
|
|
+ ]
|
|
|
+ return sensitive_words
|
|
|
|
|
|
@classmethod
|
|
|
def get_recommend(cls):
|
|
@@ -228,49 +239,61 @@ class BSZF:
|
|
|
download_video_url = video.strip().split(" + ")[13]
|
|
|
download_video_session = video.strip().split(" + ")[-1]
|
|
|
|
|
|
- Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
|
|
|
- # 下载封面
|
|
|
- Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
|
|
|
- # 下载视频
|
|
|
- Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
|
|
|
- # 保存视频信息至 "./videos/{download_video_title}/info.txt"
|
|
|
- with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
|
|
|
- f_a.write(str(download_video_id) + "\n" +
|
|
|
- str(download_video_title) + "\n" +
|
|
|
- str(download_video_duration) + "\n" +
|
|
|
- str(download_video_play_cnt) + "\n" +
|
|
|
- str(download_video_comment_cnt) + "\n" +
|
|
|
- str(download_video_like_cnt) + "\n" +
|
|
|
- str(download_video_share_cnt) + "\n" +
|
|
|
- str(download_video_resolution) + "\n" +
|
|
|
- str(download_video_send_time) + "\n" +
|
|
|
- str(download_user_name) + "\n" +
|
|
|
- str(download_head_url) + "\n" +
|
|
|
- str(download_video_url) + "\n" +
|
|
|
- str(download_cover_url) + "\n" +
|
|
|
- str(download_video_session))
|
|
|
-
|
|
|
- # 上传视频
|
|
|
- if env == "dev":
|
|
|
- Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
|
|
|
- Publish.upload_and_publish("dev", "play")
|
|
|
- elif env == "prod":
|
|
|
- Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
|
|
|
- Publish.upload_and_publish("prod", "play")
|
|
|
+ if any(word if word in download_video_title else False for word in cls.sensitive_words()) is True:
|
|
|
+ Common.crawler_log().info("视频已中敏感词,删除该视频信息:{}".format(download_video_title))
|
|
|
+ # 删除该视频在benshanzhufu_feeds.txt中的信息
|
|
|
+ with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r:
|
|
|
+ lines = f_r.readlines()
|
|
|
+ with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
+ for line in lines:
|
|
|
+ if download_video_id in line.split(" + ")[1]:
|
|
|
+ continue
|
|
|
+ f_w.write(line)
|
|
|
+ else:
|
|
|
+ Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
|
|
|
+ # 下载封面
|
|
|
+ Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
|
|
|
+ # 下载视频
|
|
|
+ Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
|
|
|
+ # 保存视频信息至 benshanzhufu_videoid.txt
|
|
|
+ with open(r"./txt/benshanzhufu_videoid.txt", "a", encoding="UTF-8") as fa:
|
|
|
+ fa.write(download_video_id + "\n")
|
|
|
+ # 添加视频 ID 到 list
|
|
|
+ cls.download_video_list.append(download_video_id)
|
|
|
+ # 保存视频信息至 "./videos/{download_video_title}/info.txt"
|
|
|
+ with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
|
|
|
+ f_a.write(str(download_video_id) + "\n" +
|
|
|
+ str(download_video_title) + "\n" +
|
|
|
+ str(download_video_duration) + "\n" +
|
|
|
+ str(download_video_play_cnt) + "\n" +
|
|
|
+ str(download_video_comment_cnt) + "\n" +
|
|
|
+ str(download_video_like_cnt) + "\n" +
|
|
|
+ str(download_video_share_cnt) + "\n" +
|
|
|
+ str(download_video_resolution) + "\n" +
|
|
|
+ str(download_video_send_time) + "\n" +
|
|
|
+ str(download_user_name) + "\n" +
|
|
|
+ str(download_head_url) + "\n" +
|
|
|
+ str(download_video_url) + "\n" +
|
|
|
+ str(download_cover_url) + "\n" +
|
|
|
+ str(download_video_session))
|
|
|
|
|
|
- # 保存视频信息至 benshanzhufu_videoid.txt
|
|
|
- with open(r"./txt/benshanzhufu_videoid.txt", "a", encoding="UTF-8") as fa:
|
|
|
- fa.write(download_video_id + "\n")
|
|
|
+ # 上传视频
|
|
|
+ if env == "dev":
|
|
|
+ Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
|
|
|
+ Publish.upload_and_publish("dev", "play")
|
|
|
+ elif env == "prod":
|
|
|
+ Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
|
|
|
+ Publish.upload_and_publish("prod", "play")
|
|
|
|
|
|
- # 删除该视频在benshanzhufu_feeds.txt中的信息
|
|
|
- Common.crawler_log().info("删除该视频在benshanzhufu_feeds.txt中的信息:{}".format(download_video_title))
|
|
|
- with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r:
|
|
|
- lines = f_r.readlines()
|
|
|
- with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
- for line in lines:
|
|
|
- if download_video_id in line.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w.write(line)
|
|
|
+ # 删除该视频在benshanzhufu_feeds.txt中的信息
|
|
|
+ Common.crawler_log().info("删除该视频在benshanzhufu_feeds.txt中的信息:{}".format(download_video_title))
|
|
|
+ with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r:
|
|
|
+ lines = f_r.readlines()
|
|
|
+ with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
+ for line in lines:
|
|
|
+ if download_video_id in line.split(" + ")[1]:
|
|
|
+ continue
|
|
|
+ f_w.write(line)
|
|
|
except Exception as e:
|
|
|
# 删除该视频在 recommend.txt中的信息
|
|
|
Common.crawler_log().error("该视频信息异常,删除在benshanzhufu_feeds.txt中的信息:{}".format(e))
|