Browse Source

update start_t

wangkun 3 years ago
parent
commit
b918665879
4 changed files with 80 additions and 59 deletions
  1. 62 35
      main/hour_list.py
  2. 5 4
      main/person_list.py
  3. 10 17
      main/run_hour_list.py
  4. 3 3
      main/run_person_list.py

+ 62 - 35
main/hour_list.py

@@ -13,8 +13,21 @@ proxies = {"http": None, "https": None}
 
 
 class HourList:
-    # # 今天的日期:年-月-日
-    # today = datetime.datetime.now().strftime("%Y-%m-%d")
+    # 过滤敏感词
+    @classmethod
+    def sensitive_words(cls):
+        # 敏感词库列表
+        word_list = []
+        # 从云文档读取所有敏感词,添加到词库列表
+        lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh")
+        for i in lists:
+            for j in i:
+                # 过滤空的单元格内容
+                if j is None:
+                    pass
+                else:
+                    word_list.append(j)
+        return word_list
 
     # 下载规则
     @staticmethod
@@ -258,6 +271,10 @@ class HourList:
                     elif int(video_play_cnt) < 5000:
                         Common.logger().info("该视频7天内播放量<5000:{}", video_title)
 
+                    # 过滤敏感词
+                    elif any(word if word in video_title else False for word in cls.sensitive_words()) is True:
+                        Common.logger().info("视频已中敏感词:{}".format(video_title))
+
                     # 从云文档去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=onyBDH
                     elif video_id in [j for i in Feishu.get_values_batch("logs", "xiaoniangao", "ba0da4") for j in i]:
                         Common.logger().info("该视频已保存过:{}", video_title)
@@ -270,13 +287,20 @@ class HourList:
                         # 获取当前时间
                         get_feeds_time = int(time.time())
                         # 看一看云文档,工作表中写入数据
-                        values = [[profile_id, profile_mid, video_id, video_title, user_name, video_url, time.strftime(
-                                "%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
+                        values = [[profile_id,
+                                   profile_mid,
+                                   video_id,
+                                   video_title,
+                                   user_name,
+                                   video_duration,
+                                   cover_url,
+                                   video_url,
+                                   time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
                                    str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(get_feeds_time))),
                                    video_play_cnt]]
                         # 等待 1s,防止操作云文档太频繁,导致报错
                         time.sleep(1)
-                        Feishu.update_values("logs", "xiaoniangao", "ba0da4", "A3:I3", values)
+                        Feishu.update_values("logs", "xiaoniangao", "ba0da4", "A3:K3", values)
 
         except Exception as e:
             Common.logger().error("获取小时榜视频列表异常:{}", e)
@@ -285,12 +309,15 @@ class HourList:
     @classmethod
     def check_hour_list_data(cls, date):
         # 判断J1单元格的日期是否为今天
-        if Feishu.get_range_value("logs", "xiaoniangao", "ba0da4", "J1:J1")[0] != date:
-            # 插入3列 J1:L1,并写入日期和时间数据
+        if Feishu.get_range_value("logs", "xiaoniangao", "ba0da4", "L1:N1")[0] != date:
+            # 插入3列 L1:N1,并写入日期和时间数据
             values = [[date], ["10:00", "15:00", "20:00"]]
-            Feishu.insert_columns("logs", "xiaoniangao", "ba0da4", "COLUMNS", 9, 12)
-            Feishu.update_values("logs", "xiaoniangao", "ba0da4",  "J1:L2", values)
-            Feishu.merge_cells("logs", "xiaoniangao", "ba0da4", "J1:L1")
+            time.sleep(1)
+            Feishu.insert_columns("logs", "xiaoniangao", "ba0da4", "COLUMNS", 11, 14)
+            time.sleep(1)
+            Feishu.update_values("logs", "xiaoniangao", "ba0da4",  "L1:N2", values)
+            time.sleep(1)
+            Feishu.merge_cells("logs", "xiaoniangao", "ba0da4", "L1:N1")
             Common.logger().info("插入今天日期成功")
         else:
             Common.logger().info("今日上升榜日期已存在")
@@ -359,12 +386,12 @@ class HourList:
 
                         # 抓取时的播放量
                         v_play_cnt = Feishu.get_range_value(
-                            "logs", "xiaoniangao", "ba0da4", "I" + str(i) + ":" + "I" + str(i))[0]
+                            "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
                         Common.logger().info("视频详情,video_play_cnt:{},{}", v_play_cnt, type(v_play_cnt))
 
                         # 抓取时间
                         v_upload_time = Feishu.get_range_value(
-                            "logs", "xiaoniangao", "ba0da4", "H" + str(i) + ":" + "H" + str(i))[0]
+                            "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
                         Common.logger().info("视频详情,video_send_time:{},{}", v_upload_time, type(v_upload_time))
 
                         # 抓取时间:日期
@@ -441,7 +468,7 @@ class HourList:
                                 values = int(ten_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
                                 Common.logger().info("10:00数据更新成功:{}", values)
 
                             elif upload_data == today and update_hour.hour == 15 and int(upload_hour) <= 10:
@@ -453,17 +480,17 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J"+str(i) + ":" + "J"+str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "L"+str(i) + ":" + "L"+str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J"+str(i) + ":" + "J"+str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "L"+str(i) + ":" + "L"+str(i))[0]
 
                                 # 15:00 的上升榜写入数据
                                 values = int(fifteen_hour_play_cnt) - (int(v_play_cnt) + int(ten_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i), [[values]])
                                 Common.logger().info("15:00数据更新成功:{}", values)
 
                             elif upload_data == today and update_hour.hour == 15 and 10 < int(upload_hour) <= 15:
@@ -477,7 +504,7 @@ class HourList:
                                 values = int(fifteen_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i), [[values]])
                                 Common.logger().info("15:00数据更新成功:{}", values)
 
                             elif upload_data == today and update_hour.hour == 20 and int(upload_hour) <= 10:
@@ -489,26 +516,26 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i))[0]
 
                                 # 当天 15:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i))[0] is None:
                                     fifteen_up_cnt = 0
                                 else:
                                     fifteen_up_cnt = Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i))[0]
 
                                 # 20:00 的上升榜写入数据
                                 values = int(twenty_hour_play_cnt) - (
                                         int(v_play_cnt) + int(ten_up_cnt) + int(fifteen_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "N" + str(i) + ":" + "N" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
                             elif upload_data == today and update_hour.hour == 20 and 10 < int(upload_hour) <= 15:
@@ -520,17 +547,17 @@ class HourList:
 
                                 # 当天 15:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i))[0] is None:
                                     fifteen_up_cnt = 0
                                 else:
                                     fifteen_up_cnt = Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i))[0]
 
                                 # 20:00 的上升榜写入数据
                                 values = int(twenty_hour_play_cnt) - (int(v_play_cnt) + int(fifteen_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "N" + str(i) + ":" + "N" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
                             elif upload_data == today and update_hour.hour == 20 and 15 < int(upload_hour) <= 20:
@@ -544,7 +571,7 @@ class HourList:
                                 values = int(twenty_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "N" + str(i) + ":" + "N" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
                             elif (upload_data == yesterday or upload_data == before_yesterday)\
@@ -559,7 +586,7 @@ class HourList:
                                 values = int(ten_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
                                 Common.logger().info("10:00数据更新成功:{}", values)
 
                             elif (upload_data == yesterday or upload_data == before_yesterday)\
@@ -572,17 +599,17 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i))[0]
 
                                 # 15:00 的上升榜写入数据
                                 values = int(fifteen_hour_play_cnt) - (int(v_play_cnt) + int(ten_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i), [[values]])
                                 Common.logger().info("15:00数据更新成功:{}", values)
 
                             elif (upload_data == yesterday or upload_data == before_yesterday)\
@@ -595,26 +622,26 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i))[0]
 
                                 # 当天 15:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i))[0] is None:
                                     fifteen_up_cnt = 0
                                 else:
                                     fifteen_up_cnt = Feishu.get_range_value(
-                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "M" + str(i) + ":" + "M" + str(i))[0]
 
                                 # 20:00 的上升榜写入数据
                                 values = int(twenty_hour_play_cnt) - (
                                         int(v_play_cnt) + int(ten_up_cnt) + int(fifteen_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "N" + str(i) + ":" + "N" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
                         except Exception as e:

+ 5 - 4
main/person_list.py

@@ -12,7 +12,8 @@ proxies = {"http": None, "https": None}
 
 
 class Person:
-    next_t = -1
+    # 翻页初始值
+    next_t_list = [-1]
 
     # 过滤敏感词
     @classmethod
@@ -131,7 +132,7 @@ class Person:
         data = {
             "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
             "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
-            "start_t": int(cls.next_t),
+            "start_t": int(cls.next_t_list[-1]),
             "limit": 5,
             "share_width": 625,
             "share_height": 500,
@@ -164,8 +165,8 @@ class Person:
         try:
             urllib3.disable_warnings()
             r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-            cls.next_t = r.json()["data"]["next_t"]
-            # cls.next_t_list.append(next_t)
+            next_t = r.json()["data"]["next_t"]
+            cls.next_t_list.append(next_t)
             feeds = r.json()["data"]["list"]
             for i in range(len(feeds)):
                 # 标题

+ 10 - 17
main/run_hour_list.py

@@ -12,40 +12,33 @@ from main.hour_list import HourList
 
 def hour_list_job():
     while True:
+        # 今天的日期:年-月-日
+        today = datetime.datetime.now().strftime("%Y-%m-%d")
+        # 昨天
+        yesterday = (datetime.date.today() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
+        # 前天
+        before_yesterday = (datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d")
+
+        Common.logger().info("检查今日上升榜日期是否存在")
+        HourList.check_hour_list_data(today)
+
         while True:
             # 获取符合规则的视频,写入列表
             HourList.get_hour_list_feeds()
             time.sleep(1)
-
-            # 今天的日期:年-月-日
-            today = datetime.datetime.now().strftime("%Y-%m-%d")
-            # 昨天
-            yesterday = (datetime.date.today() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
-            # 前天
-            before_yesterday = (datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d")
-
             hour_list_job_time = datetime.datetime.now()
             if hour_list_job_time.hour == 10 and 0 <= hour_list_job_time.minute <= 10:
 
-                Common.logger().info("检查今日上升榜日期是否存在")
-                HourList.check_hour_list_data(today)
-
                 Common.logger().info("开始更新上升榜")
                 HourList.update_hour_list_data(today, yesterday, before_yesterday)
 
             elif hour_list_job_time.hour == 15 and hour_list_job_time.minute <= 10:
 
-                Common.logger().info("检查今日上升榜日期是否存在")
-                HourList.check_hour_list_data(today)
-
                 Common.logger().info("开始更新上升榜")
                 HourList.update_hour_list_data(today, yesterday, before_yesterday)
 
             elif hour_list_job_time.hour == 20 and hour_list_job_time.minute <= 10:
 
-                Common.logger().info("检查今日上升榜日期是否存在")
-                HourList.check_hour_list_data(today)
-
                 Common.logger().info("开始更新上升榜")
                 HourList.update_hour_list_data(today, yesterday, before_yesterday)
 

+ 3 - 3
main/run_person.py → main/run_person_list.py

@@ -25,12 +25,12 @@ def person_list_job():
                 Person.download_from_sub(endtime)
             elif person_list_time.hour == 23 and person_list_time.minute >= 50:
                 Common.person_logger().info("结束今日抓取任务")
-                Person.next_t = -1
+                Person.next_t_list = [-1]
                 break
             else:
-                Common.person_logger().info("发布时间于2022年5月18日,结束抓取任务")
+                Common.person_logger().info("发布时间于2022年5月18日,结束抓取任务")
                 time.sleep(3600)
-                Person.next_t = -1
+                Person.next_t_list = [-1]
                 break