소스 검색

update download cnt

wangkun 3 년 전
부모
커밋
36bdfa54b4
2개의 변경된 파일81개의 추가작업 그리고 45개의 파일을 삭제
  1. 64 41
      main/download.py
  2. 17 4
      main/run.py

+ 64 - 41
main/download.py

@@ -20,6 +20,17 @@ proxies = {"http": None, "https": None}
 
 
 class BSZF:
+    # 已下载视频列表
+    download_video_list = []
+
+    # 过滤关键字
+    @classmethod
+    def sensitive_words(cls):
+        sensitive_words = [
+            "早上好",
+            "晚上好",
+        ]
+        return sensitive_words
 
     @classmethod
     def get_recommend(cls):
@@ -228,49 +239,61 @@ class BSZF:
                 download_video_url = video.strip().split(" + ")[13]
                 download_video_session = video.strip().split(" + ")[-1]
 
-                Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
-                # 下载封面
-                Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
-                # 下载视频
-                Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
-                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
-                    f_a.write(str(download_video_id) + "\n" +
-                              str(download_video_title) + "\n" +
-                              str(download_video_duration) + "\n" +
-                              str(download_video_play_cnt) + "\n" +
-                              str(download_video_comment_cnt) + "\n" +
-                              str(download_video_like_cnt) + "\n" +
-                              str(download_video_share_cnt) + "\n" +
-                              str(download_video_resolution) + "\n" +
-                              str(download_video_send_time) + "\n" +
-                              str(download_user_name) + "\n" +
-                              str(download_head_url) + "\n" +
-                              str(download_video_url) + "\n" +
-                              str(download_cover_url) + "\n" +
-                              str(download_video_session))
-
-                # 上传视频
-                if env == "dev":
-                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
-                    Publish.upload_and_publish("dev", "play")
-                elif env == "prod":
-                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
-                    Publish.upload_and_publish("prod", "play")
+                if any(word if word in download_video_title else False for word in cls.sensitive_words()) is True:
+                    Common.crawler_log().info("视频已中敏感词,删除该视频信息:{}".format(download_video_title))
+                    # 删除该视频在benshanzhufu_feeds.txt中的信息
+                    with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r:
+                        lines = f_r.readlines()
+                    with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w:
+                        for line in lines:
+                            if download_video_id in line.split(" + ")[1]:
+                                continue
+                            f_w.write(line)
+                else:
+                    Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
+                    # 下载封面
+                    Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
+                    # 下载视频
+                    Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
+                    # 保存视频信息至 benshanzhufu_videoid.txt
+                    with open(r"./txt/benshanzhufu_videoid.txt", "a", encoding="UTF-8") as fa:
+                        fa.write(download_video_id + "\n")
+                    # 添加视频 ID 到 list
+                    cls.download_video_list.append(download_video_id)
+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                    with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
+                        f_a.write(str(download_video_id) + "\n" +
+                                  str(download_video_title) + "\n" +
+                                  str(download_video_duration) + "\n" +
+                                  str(download_video_play_cnt) + "\n" +
+                                  str(download_video_comment_cnt) + "\n" +
+                                  str(download_video_like_cnt) + "\n" +
+                                  str(download_video_share_cnt) + "\n" +
+                                  str(download_video_resolution) + "\n" +
+                                  str(download_video_send_time) + "\n" +
+                                  str(download_user_name) + "\n" +
+                                  str(download_head_url) + "\n" +
+                                  str(download_video_url) + "\n" +
+                                  str(download_cover_url) + "\n" +
+                                  str(download_video_session))
 
-                # 保存视频信息至 benshanzhufu_videoid.txt
-                with open(r"./txt/benshanzhufu_videoid.txt", "a", encoding="UTF-8") as fa:
-                    fa.write(download_video_id + "\n")
+                    # 上传视频
+                    if env == "dev":
+                        Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
+                        Publish.upload_and_publish("dev", "play")
+                    elif env == "prod":
+                        Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
+                        Publish.upload_and_publish("prod", "play")
 
-                # 删除该视频在benshanzhufu_feeds.txt中的信息
-                Common.crawler_log().info("删除该视频在benshanzhufu_feeds.txt中的信息:{}".format(download_video_title))
-                with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r:
-                    lines = f_r.readlines()
-                with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w:
-                    for line in lines:
-                        if download_video_id in line.split(" + ")[1]:
-                            continue
-                        f_w.write(line)
+                    # 删除该视频在benshanzhufu_feeds.txt中的信息
+                    Common.crawler_log().info("删除该视频在benshanzhufu_feeds.txt中的信息:{}".format(download_video_title))
+                    with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r:
+                        lines = f_r.readlines()
+                    with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w:
+                        for line in lines:
+                            if download_video_id in line.split(" + ")[1]:
+                                continue
+                            f_w.write(line)
             except Exception as e:
                 # 删除该视频在 recommend.txt中的信息
                 Common.crawler_log().error("该视频信息异常,删除在benshanzhufu_feeds.txt中的信息:{}".format(e))

+ 17 - 4
main/run.py

@@ -17,15 +17,27 @@ class Main:
         """
         测试环境脚本
         """
-        Common.crawler_log().info("开始抓取本山祝福视频\n")
-        BSZF.get_recommend()
-        BSZF.download_video("dev")
+        if len(BSZF.download_video_list) >= 10:
+            Common.crawler_log().info("已下载视频数:{}".format(len(BSZF.download_video_list)))
+        else:
+            Common.crawler_log().info("开始抓取本山祝福视频\n")
+            BSZF.get_recommend()
+            BSZF.download_video("dev")
 
         # 删除多余日志
         Common.del_logs()
         # 统计累计下载数量
         Common.benshanzhufu_download_count()
 
+    @classmethod
+    def main_dev(cls):
+        while True:
+            main_dev_time = datetime.datetime.now()
+            if main_dev_time.hour >= 15:
+                cls.download_job_dev()
+            else:
+                break
+
     @classmethod
     def download_job_prod(cls):
         """
@@ -62,4 +74,5 @@ class Main:
 
 if __name__ == "__main__":
     main = Main()
-    main.main()
+    # main.main()
+    main.main_dev()