Sfoglia il codice sorgente

接入敏感词过滤

wangkun 3 anni fa
parent
commit
f2930ba5f1
4 ha cambiato i file con 89 aggiunte e 15 eliminazioni
  1. 23 4
      main/download_play.py
  2. 27 9
      main/download_sendtime.py
  3. 20 2
      main/download_up.py
  4. 19 0
      main/get_feeds.py

+ 23 - 4
main/download_play.py

@@ -17,7 +17,7 @@ from main.feishu_lib import Feishu
 
 
 class DownloadPlay:
-
+    # 下载规则
     @staticmethod
     def play_rule(play_width, play_height, play_duration, play_play_cnt):
         """
@@ -77,7 +77,8 @@ class DownloadPlay:
                             # 下载视频
                             Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
                             # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                            with open(r"./videos/" + download_video_title + r"/info.txt", "a", encoding="utf8") as f_a:
+                            with open(r"./videos/" + download_video_title
+                                      + "/" + "info.txt", "a", encoding="utf8") as f_a:
                                 f_a.write(str(download_video_id) + "\n" +
                                           str(download_video_title) + "\n" +
                                           str(download_video_duration) + "\n" +
@@ -92,6 +93,7 @@ class DownloadPlay:
                                           str(download_video_url) + "\n" +
                                           str(download_cover_url) + "\n" +
                                           str(download_video_session))
+                            Common.logger().info("==========视频信息已保存至info.txt==========")
 
                             # 上传视频
                             Common.logger().info("开始上传视频:{}", download_video_title)
@@ -102,8 +104,25 @@ class DownloadPlay:
                             # 看一看+ ,视频ID工作表,插入首行
                             Feishu.insert_columns("20ce0c")
                             # 看一看+ ,视频ID工作表,首行写入数据
-                            Feishu.update_values("20ce0c", download_video_id, "", "", "",
-                                                 "", "", "", "", "", "", "", "", "", "", "")
+                            upload_time = int(time.time())
+                            Feishu.update_values("20ce0c",
+                                                 str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time))),
+                                                 str(download_video_id),
+                                                 str(download_video_play_cnt),
+                                                 str(download_video_title),
+                                                 str(download_video_duration),
+                                                 str(download_video_comment_cnt),
+                                                 str(download_video_like_cnt),
+                                                 str(download_video_share_cnt),
+                                                 str(download_video_resolution),
+                                                 str(time.strftime("%Y-%m-%d %H:%M:%S",
+                                                                   time.localtime(
+                                                                       int(download_video_send_time)))),
+                                                 str(download_user_name),
+                                                 str(download_head_url),
+                                                 str(download_cover_url),
+                                                 str(download_video_url),
+                                                 str(download_video_session))
 
                             # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
                             Common.logger().info("从云文档删除该视频信息:{}", download_video_title)

+ 27 - 9
main/download_sendtime.py

@@ -98,14 +98,14 @@ class DownloadSendtime:
                             v_video_cover = data["cover_url"]
                             if "items" not in data["play_info"]:
                                 if len(data["play_info"]) > 2:
-                                    download_url_up = data["play_info"][2]["play_url"]
+                                    download_url_sendtime = data["play_info"][2]["play_url"]
                                 else:
-                                    download_url_up = data["play_info"][0]["play_url"]
+                                    download_url_sendtime = data["play_info"][0]["play_url"]
                             else:
                                 if len(data["play_info"]["items"]) > 2:
-                                    download_url_up = data["play_info"]["items"][2]["play_url"]
+                                    download_url_sendtime = data["play_info"]["items"][2]["play_url"]
                                 else:
-                                    download_url_up = data["play_info"]["items"][0]["play_url"]
+                                    download_url_sendtime = data["play_info"]["items"][0]["play_url"]
 
                             # 判断基本规则
                             if download_video_id not in [j for i in Feishu.get_values_batch("20ce0c") for j in i]\
@@ -114,7 +114,7 @@ class DownloadSendtime:
                                     and v_play_cnt_sendtime != "" and v_comment_cnt != "" and v_liked_cnt != "" \
                                     and v_shared_cnt != "" and v_width != "" and v_height != "" \
                                     and v_send_date != "" and v_username != "" and v_user_cover != "" \
-                                    and v_video_cover != "" and download_url_up != "":
+                                    and v_video_cover != "" and download_url_sendtime != "":
                                 # 满足下载条件:当前时间 - 发布时间 <= 3天,播放量大于1万
                                 if int(time.time()) - int(v_send_date) <= 604800:
                                     if int(v_play_cnt_sendtime) >= 10000:
@@ -124,7 +124,7 @@ class DownloadSendtime:
                                         # 下载封面
                                         Common.download_method("cover", download_video_title, v_video_cover)
                                         # 下载视频
-                                        Common.download_method("video", download_video_title, download_url_up)
+                                        Common.download_method("video", download_video_title, download_url_sendtime)
                                         # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
                                         with open(r"./videos/" + download_video_title +
                                                   "/" + "info.txt", "a", encoding="utf8") as f_a2:
@@ -139,9 +139,10 @@ class DownloadSendtime:
                                                        str(v_send_date) + "\n" +
                                                        str(v_username) + "\n" +
                                                        str(v_user_cover) + "\n" +
-                                                       str(download_url_up) + "\n" +
+                                                       str(download_url_sendtime) + "\n" +
                                                        str(v_video_cover) + "\n" +
                                                        str(sendtime_session))
+                                        Common.logger().info("==========视频信息已保存至info.txt==========")
 
                                         # 上传该视频
                                         Common.logger().info("开始上传视频:{}", download_video_title)
@@ -153,8 +154,25 @@ class DownloadSendtime:
                                         # 看一看+ ,视频ID工作表,插入首行
                                         Feishu.insert_columns("20ce0c")
                                         # 看一看+ ,视频ID工作表,首行写入数据
-                                        Feishu.update_values("20ce0c", download_video_id, "", "", "",
-                                                             "", "", "", "", "", "", "", "", "", "", "")
+                                        upload_time = int(time.time())
+                                        Feishu.update_values("20ce0c",
+                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
+                                                                               time.localtime(upload_time))),
+                                                             str(download_video_id),
+                                                             str(v_play_cnt_sendtime),
+                                                             str(download_video_title),
+                                                             str(v_duration),
+                                                             str(v_comment_cnt),
+                                                             str(v_liked_cnt),
+                                                             str(v_shared_cnt),
+                                                             str(v_resolution),
+                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
+                                                                               time.localtime(int(v_send_date)))),
+                                                             str(v_username),
+                                                             str(v_user_cover),
+                                                             str(v_video_cover),
+                                                             str(download_url_sendtime),
+                                                             str(sendtime_session))
 
                                         # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
                                         Common.logger().info("从云文档删除该视频信息:{}", download_video_title)

+ 20 - 2
main/download_up.py

@@ -154,6 +154,7 @@ class DownloadUp:
                                                        str(download_url_up) + "\n" +
                                                        str(v_video_cover) + "\n" +
                                                        str(video_info_session))
+                                        Common.logger().info("==========视频信息已保存至info.txt==========")
 
                                         # 上传该视频
                                         Common.logger().info("开始上传视频:{}", download_video_title)
@@ -165,8 +166,25 @@ class DownloadUp:
                                         # 看一看+ ,视频ID工作表,插入首行
                                         Feishu.insert_columns("20ce0c")
                                         # 看一看+ ,视频ID工作表,首行写入数据
-                                        Feishu.update_values("20ce0c", download_video_id, "", "", "",
-                                                             "", "", "", "", "", "", "", "", "", "", "")
+                                        upload_time = int(time.time())
+                                        Feishu.update_values("20ce0c",
+                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
+                                                                               time.localtime(upload_time))),
+                                                             str(download_video_id),
+                                                             str(v_play_cnt_up),
+                                                             str(download_video_title),
+                                                             str(v_duration),
+                                                             str(v_comment_cnt),
+                                                             str(v_liked_cnt),
+                                                             str(v_shared_cnt),
+                                                             str(v_resolution),
+                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
+                                                                               time.localtime(int(v_send_date)))),
+                                                             str(v_username),
+                                                             str(v_user_cover),
+                                                             str(v_video_cover),
+                                                             str(download_url_up),
+                                                             str(video_info_session))
 
                                         # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
                                         Common.logger().info("从云文档删除该视频信息:{}", download_video_title)

+ 19 - 0
main/get_feeds.py

@@ -20,6 +20,22 @@ from main.common import Common
 proxies = {"http": None, "https": None}
 
 
+# 敏感词库
+def kanyikan_sensitive_words():
+    # 敏感词库列表
+    word_list = []
+    # 从云文档读取所有敏感词,添加到词库列表
+    lists = Feishu.get_values_batch("rofdM5")
+    for i in lists:
+        for j in i:
+            # 过滤空的单元格内容
+            if j is None:
+                pass
+            else:
+                word_list.append(j)
+    return word_list
+
+
 def get_feeds():
     """
     1.从看一看+小程序首页推荐,获取视频列表
@@ -179,6 +195,9 @@ def get_feeds():
                             or video_cover == "" \
                             or url == "":
                         Common.logger().info("无效视频")
+                    # 过滤敏感词
+                    elif any(word if word in video_title else False for word in kanyikan_sensitive_words()) is True:
+                        Common.logger().info("视频已中敏感词:{}".format(video_title))
                     # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
                     elif video_id in [j for i in Feishu.get_values_batch("20ce0c") for j in i]:
                         Common.logger().info("该视频已下载:{}", video_title)