3 anni fa · 379f2dcb0a
--- a/main/common.py
+++ b/main/common.py
@@ -126,6 +126,23 @@ class Common:
 
				         with open(r"./txt/" + t_name, "r", encoding="UTF-8") as f:
			
 
				             return f.readlines()
			
 
				 
			
 
				+    @classmethod
			
 
				+    def del_content_in_txt(cls, d_content, d_filename):
			
 
				+        """
			
 
				+        删除指定文本的指定内容
			
 
				+        :param d_content: 删除的指定内容
			
 
				+        :param d_filename: 指定的文本
			
 
				+        :return: None
			
 
				+        """
			
 
				+        with open(r"./txt/" + d_filename, "r", encoding="UTF-8") as f_r:
			
 
				+            lines = f_r.readlines()
			
 
				+        with open(r"./txt/" + d_filename, "w", encoding="utf-8") as f_w:
			
 
				+            for line in lines:
			
 
				+                if d_content in line.split(" + ")[1]:
			
 
				+                    continue
			
 
				+                f_w.write(line)
			
 
				+        cls.crawler_log().info("删除{}中的{}成功".format(d_filename, d_content))
			
 
				+
			
 
				     @classmethod
			
 
				     def benshanzhufu_download_count(cls):
			
 
				         videoid_path = r"./txt/benshanzhufu_videoid.txt"
			
--- a/main/download.py
+++ b/main/download.py
@@ -29,6 +29,30 @@ class BSZF:
 
				         sensitive_words = [
			
 
				             "早上好",
			
 
				             "晚上好",
			
 
				+            "中午好",
			
 
				+            "最美祝福",
			
 
				+            "祝福",
			
 
				+            "新年好",
			
 
				+            "立春",
			
 
				+            "雨水",
			
 
				+            "惊蛰",
			
 
				+            "春分",
			
 
				+            "清明",
			
 
				+            "谷雨",
			
 
				+            "小暑",
			
 
				+            "大暑",
			
 
				+            "立秋",
			
 
				+            "处暑",
			
 
				+            "白露",
			
 
				+            "秋分",
			
 
				+            "寒露",
			
 
				+            "霜降",
			
 
				+            "立冬",
			
 
				+            "小雪",
			
 
				+            "大雪",
			
 
				+            "冬至",
			
 
				+            "小寒",
			
 
				+            "大寒",
			
 
				         ]
			
 
				         return sensitive_words
			
 
				 
			
@@ -242,13 +266,7 @@ class BSZF:
 
				                 if any(word if word in download_video_title else False for word in cls.sensitive_words()) is True:
			
 
				                     Common.crawler_log().info("视频已中敏感词，删除该视频信息：{}".format(download_video_title))
			
 
				                     # 删除该视频在benshanzhufu_feeds.txt中的信息
			
 
				-                    with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r:
			
 
				-                        lines = f_r.readlines()
			
 
				-                    with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				-                        for line in lines:
			
 
				-                            if download_video_id in line.split(" + ")[1]:
			
 
				-                                continue
			
 
				-                            f_w.write(line)
			
 
				+                    Common.del_content_in_txt(download_video_id, "benshanzhufu_feeds.txt")
			
 
				                 else:
			
 
				                     Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
			
 
				                     # 下载封面
			
@@ -285,25 +303,16 @@ class BSZF:
 
				                         Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				                         Publish.upload_and_publish("prod", "play")
			
 
				 
			
 
				-                    # 删除该视频在benshanzhufu_feeds.txt中的信息
			
 
				-                    Common.crawler_log().info("删除该视频在benshanzhufu_feeds.txt中的信息:{}".format(download_video_title))
			
 
				-                    with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r3:
			
 
				-                        lines = f_r3.readlines()
			
 
				-                    with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w3:
			
 
				-                        for line in lines:
			
 
				-                            if download_video_id in line.split(" + ")[1]:
			
 
				-                                continue
			
 
				-                            f_w3.write(line)
			
 
				+                    try:
			
 
				+                        Common.del_content_in_txt(download_video_id, "benshanzhufu_feeds.txt")
			
 
				+                    except Exception as e:
			
 
				+                        Common.crawler_log().error("删除benshanzhufu_feeds.txt中的{}失败，重新删除:{}".format(download_video_id, e))
			
 
				+                        Common.del_content_in_txt(download_video_id, "benshanzhufu_feeds.txt")
			
 
				+
			
 
				             except Exception as e:
			
 
				-                # 删除该视频在 recommend.txt中的信息
			
 
				-                Common.crawler_log().error("该视频信息异常，删除在benshanzhufu_feeds.txt中的信息:{}".format(e))
			
 
				-                with open(r"./txt/benshanzhufu_feeds.txt", "r", encoding="UTF-8") as f_r4:
			
 
				-                    lines = f_r4.readlines()
			
 
				-                with open(r"./txt/benshanzhufu_feeds.txt", "w", encoding="utf-8") as f_w4:
			
 
				-                    for line in lines:
			
 
				-                        if download_video_id in line.split(" + ")[1]:
			
 
				-                            continue
			
 
				-                        f_w4.write(line)
			
 
				+                Common.crawler_log().error("下载视频异常:{}".format(e))
			
 
				+                # 删除该视频在 recommend.txt 中的信息
			
 
				+                Common.del_content_in_txt(download_video_id, "benshanzhufu_feeds.txt")
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
--- a/main/run.py
+++ b/main/run.py
@@ -44,7 +44,7 @@ class Main:
 
				         """
			
 
				         正式环境脚本
			
 
				         """
			
 
				-        if len(BSZF.download_video_list) >= 300:
			
 
				+        if len(BSZF.download_video_list) >= 200:
			
 
				             Common.crawler_log().info("已下载视频数：{}".format(len(BSZF.download_video_list)))
			
 
				             time.sleep(1800)
			
 
				         else:
			
@@ -62,7 +62,7 @@ class Main:
 
				         while True:
			
 
				             while True:
			
 
				                 main_time = datetime.datetime.now()
			
 
				-                if main_time.hour >= 15:
			
 
				+                if main_time.hour >= 11:
			
 
				                     cls.download_job_prod()
			
 
				                 else:
			
 
				                     break
			
--- a/抓取规则.txt
+++ b/抓取规则.txt
@@ -1,3 +1,10 @@
 
				+==========2022/4/29===========
			
 
				+- 增加敏感词过滤
			
 
				+- 每天 11 点开始爬取，上限 200 条
			
 
				+- 上传账号：[20631241, 20631242, 20631244, 20631245, 20631246, 20631247]
			
 
				+==============================
			
 
				+
			
 
				+
			
 
				 ==========2022/4/27===========
			
 
				 - 全爬
			
 
				 - 每天9-12点进行爬取