Переглянути джерело

增加关注榜单下载及上传

wangkun 3 роки тому
батько
коміт
f8551fde12
10 змінених файлів з 599 додано та 241 видалено
  1. 18 2
      README.md
  2. 53 25
      main/common.py
  3. 15 8
      main/demo.py
  4. 0 81
      main/download_person.py
  5. 86 46
      main/feishu_lib.py
  6. 69 66
      main/hour_list.py
  7. 304 0
      main/person_list.py
  8. 4 5
      main/publish.py
  9. 15 8
      main/run_hour_list.py
  10. 35 0
      main/run_person.py

+ 18 - 2
README.md

@@ -1,4 +1,4 @@
-小年糕爬虫 Mac 版本
+小年糕爬虫
 
 7 天内,播放量>=5000
 时长 1-10min
@@ -6,4 +6,20 @@
 每小时新增播放数据
 
 爬取时间要小于上升榜时间
-上升榜中写入的数据:当前播放量 - 上个时间段的播放量
+上升榜中写入的数据:当前播放量 - 上个时间段的播放量
+
+上传视频时,info.txt文件中的视频信息包含:
+str(download_video_id)
+str(download_video_title)
+str(download_video_duration)
+str(download_video_play_cnt)
+str(download_video_comment_cnt)
+str(download_video_like_cnt)
+str(download_video_share_cnt)
+str(download_video_resolution)
+str(download_video_send_time)
+str(download_user_name)
+str(download_head_url)
+str(download_video_url)
+str(download_cover_url)
+str(download_video_session)

+ 53 - 25
main/common.py

@@ -87,29 +87,41 @@ class Common:
         :d_dir: 需要删除的 log 地址
         :return: 保留最近 7 个日志
         """
-        global logs_dir
         if d_dir == "logs":
             logs_dir = "./logs/"
+            all_files = sorted(os.listdir(logs_dir))
+            all_logs = []
+            for log in all_files:
+                name = os.path.splitext(log)[-1]
+                if name == ".log":
+                    all_logs.append(log)
+
+            if len(all_logs) <= 7:
+                pass
+            else:
+                for file in all_logs[:len(all_logs) - 7]:
+                    os.remove(logs_dir + file)
+            cls.logger().info("清除冗余日志成功")
+
         elif d_dir == "person-logs":
             logs_dir = "./person-logs/"
-
-        all_files = sorted(os.listdir(logs_dir))
-        all_logs = []
-        for log in all_files:
-            name = os.path.splitext(log)[-1]
-            if name == ".log":
-                all_logs.append(log)
-
-        if len(all_logs) <= 7:
-            pass
-        else:
-            for file in all_logs[:len(all_logs) - 7]:
-                os.remove(logs_dir + file)
-        cls.logger().info("清除冗余日志成功")
+            all_files = sorted(os.listdir(logs_dir))
+            all_logs = []
+            for log in all_files:
+                name = os.path.splitext(log)[-1]
+                if name == ".log":
+                    all_logs.append(log)
+
+            if len(all_logs) <= 7:
+                pass
+            else:
+                for file in all_logs[:len(all_logs) - 7]:
+                    os.remove(logs_dir + file)
+            cls.person_logger().info("清除冗余日志成功")
 
     # 封装下载视频或封面的方法
     @classmethod
-    def download_method(cls, text, d_name, d_url):
+    def download_method(cls, log_path, text, d_name, d_url):
         """
         下载封面:text == "cover" ; 下载视频:text == "video"
         需要下载的视频标题:d_title
@@ -117,8 +129,8 @@ class Common:
         下载保存路径:"./files/{d_title}/"
         """
         # 首先创建一个保存该视频相关信息的文件夹
-        # video_dir = "./videos/" + d_name + "/"
-        video_dir = "./videos/"
+        video_dir = "./videos/" + d_name + "/"
+        # video_dir = "./videos/"
         if not os.path.exists(video_dir):
             os.mkdir(video_dir)
 
@@ -127,7 +139,9 @@ class Common:
             # 需要下载的视频地址
             video_url = d_url
             # 视频名
-            video_name = d_name + ".mp4"
+            video_name = "video.mp4"
+            # # 视频名
+            # video_name = d_name + ".mp4"
 
             # 下载视频
             urllib3.disable_warnings()
@@ -136,16 +150,24 @@ class Common:
                 with open(video_dir + video_name, "wb") as f:
                     for chunk in response.iter_content(chunk_size=10240):
                         f.write(chunk)
-                cls.logger().info("==========视频下载完成==========")
+                if log_path == "logs":
+                    cls.logger().info("==========视频下载完成==========")
+                elif log_path == "person-logs":
+                    cls.person_logger().info("==========视频下载完成==========")
             except Exception as e:
-                cls.logger().exception("视频下载失败:{}", e)
+                if log_path == "logs":
+                    cls.logger().exception("视频下载失败:{}", e)
+                elif log_path == "person-logs":
+                    cls.person_logger().exception("视频下载失败:{}", e)
 
         # 下载封面
         elif text == "cover":
             # 需要下载的封面地址
             cover_url = d_url
             # 封面名
-            cover_name = d_name + ".jpg"
+            cover_name = "image.jpg"
+            # # 封面名
+            # cover_name = d_name + ".jpg"
 
             # 下载封面
             urllib3.disable_warnings()
@@ -153,10 +175,16 @@ class Common:
             try:
                 with open(video_dir + cover_name, "wb") as f:
                     f.write(response.content)
-                cls.logger().info("==========封面下载完成==========")
+                if log_path == "logs":
+                    cls.logger().info("==========封面下载完成==========")
+                elif log_path == "person-logs":
+                    cls.person_logger().info("==========封面下载完成==========")
             except Exception as e:
-                cls.logger().exception("封面下载失败:{}", e)
+                if log_path == "logs":
+                    cls.logger().exception("封面下载失败:{}", e)
+                elif log_path == "person-logs":
+                    cls.person_logger().exception("封面下载失败:{}", e)
 
 
 if __name__ == "__main__":
-    common = Common()
+    common = Common()

+ 15 - 8
main/demo.py

@@ -18,12 +18,19 @@ import time
 # print(type(time2))
 # print(time3)
 # print(time4)
-yesterday = (datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d")
-update_hour = datetime.datetime.now().strftime("%Y-%m-%d")
-print(type(yesterday))
-print(yesterday)
-print(type(update_hour))
-print(update_hour)
-
-
+# yesterday = (datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d")
+# update_hour = datetime.datetime.now().strftime("%Y-%m-%d")
+# print(type(yesterday))
+# print(yesterday)
+# print(type(update_hour))
+# print(update_hour)
+# print(int("-1"))
 
+list1 = [-1]
+print(list1[-1])
+a = "a"
+list1.append(a)
+print(list1[-1])
+b = "b"
+list1.append(b)
+print(list1[-1])

+ 0 - 81
main/download_person.py

@@ -1,81 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/5/18
-import time
-
-import requests
-
-from main.common import Common
-from main.feishu_lib import Feishu
-
-
-class Person:
-    # 个人作品列表页
-    @classmethod
-    def get_person_list(cls):
-        try:
-            if len(Feishu.get_values_batch("oNpThi")) == 1:
-                print(len(Feishu.get_values_batch("oNpThi")))
-                print(Feishu.get_values_batch("oNpThi"))
-                Common.person_logger().info("暂无定向爬取账号")
-            else:
-                for i in range(1, len(Feishu.get_values_batch("oNpThi"))+1):
-                    time.sleep(1)
-                    Common.person_logger().info("")
-
-                    url = "https://api.xiaoniangao.cn/profile/list_album"
-                    headers = {
-                        "X-Mid": "1164637358",
-                        "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358",
-                        "content-type": "application/json",
-                        "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
-                        "Accept-Encoding": "gzip,compress,br,deflate",
-                        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
-                                      " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
-                                      "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
-                        "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html"
-                    }
-                    data = {
-                        "visited_mid": "260159327",
-                        "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!690x385r/crop/690x385/interlace/1/format/jpg",
-                        "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!120x120r/crop/120x120/interlace/1/format/jpg",
-                        "limit": 20,
-                        "token": "451273638af2c8bb90266bcfaf601a68",
-                        "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
-                        "proj": "ma",
-                        "wx_ver": "8.0.20",
-                        "code_ver": "3.62.0",
-                        "log_common_params": {
-                            "e": [{
-                                "data": {
-                                    "page": "profilePage",
-                                    "topic": "public"
-                                }
-                            }],
-                            "ext": {
-                                "brand": "iPhone",
-                                "device": "iPhone 11",
-                                "os": "iOS 14.7.1",
-                                "weixinver": "8.0.20",
-                                "srcver": "2.24.2",
-                                "net": "wifi",
-                                "scene": "1089"
-                            },
-                            "pj": "1",
-                            "pf": "2",
-                            "session_id": "ba9b042f-5150-4c3e-a5da-b2fc4181b954"
-                        }
-                    }
-                    r = requests.post(url=url, headers=headers, json=data)
-                    feeds = r.json()["data"]["list"]
-                    # for i in feeds:
-                    #     print(i)
-                    values = [["111", "222", "333", "444"]]
-                    Feishu.update_hour_list_values("yatRv2", "A4:D4", values)
-        except Exception as e:
-            Common.person_logger().info("个人作品列表页异常:{}", e)
-
-
-if __name__ == "__main__":
-    person = Person()
-    person.get_person_list()

+ 86 - 46
main/feishu_lib.py

@@ -1,9 +1,7 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Time: 2022/5/11
-import datetime
 import json
-import time
 
 import requests
 import urllib3
@@ -28,7 +26,7 @@ class Feishu:
 
     # 飞书路径token
     @classmethod
-    def spreadsheetToken(cls, crawler):
+    def spreadsheettoken(cls, crawler):
         """
         :param crawler: 哪个爬虫
         """
@@ -41,9 +39,9 @@ class Feishu:
         elif crawler == "xiaoniangao":
             return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
 
-            # 获取飞书api token
+    # 获取飞书api token
     @classmethod
-    def get_token(cls):
+    def get_token(cls, log_path):
         """
         获取飞书api token
         :return:
@@ -58,20 +56,23 @@ class Feishu:
             tenant_access_token = response.json()["tenant_access_token"]
             return tenant_access_token
         except Exception as e:
-            Common.logger().error("获取飞书 api token 异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("获取飞书 api token 异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("获取飞书 api token 异常:{}", e)
 
     # 获取表格元数据
     @classmethod
-    def get_metainfo(cls, crawler):
+    def get_metainfo(cls, log_path, crawler):
         """
         获取表格元数据
         :return:
         """
-        get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-                           + cls.spreadsheetToken(crawler) + "/metainfo"
+        get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                           + cls.spreadsheettoken(crawler) + "/metainfo"
 
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_path),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -84,21 +85,25 @@ class Feishu:
             response = json.loads(r.content.decode("utf8"))
             return response
         except Exception as e:
-            Common.logger().error("获取表格元数据异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("获取表格元数据异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("获取表格元数据异常:{}", e)
 
     # 读取工作表中所有数据
     @classmethod
-    def get_values_batch(cls, crawler, sheetid):
+    def get_values_batch(cls, log_path, crawler, sheetid):
         """
         读取工作表中所有数据
+        :param log_path: 启用哪个 log
         :param crawler: 哪个爬虫
         :param sheetid: 哪张表
         :return: 所有数据
         """
-        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-                               + cls.spreadsheetToken(crawler) + "/values_batch_get"
+        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                               + cls.spreadsheettoken(crawler) + "/values_batch_get"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_path),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -124,23 +129,27 @@ class Feishu:
             values = response["data"]["valueRanges"][0]["values"]
             return values
         except Exception as e:
-            Common.logger().error("读取工作表所有数据异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("读取工作表所有数据异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("读取工作表所有数据异常:{}", e)
 
     # 工作表,插入行或列
     @classmethod
-    def insert_columns(cls, crawler, sheetid, majordimension, startindex, endindex):
+    def insert_columns(cls, log_path, crawler, sheetid, majordimension, startindex, endindex):
         """
         工作表插入行或列
+        :param log_path: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid:哪张工作表
         :param majordimension:行或者列
         :param startindex:开始位置
         :param endindex:结束位置
         """
-        insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-                             + cls.spreadsheetToken(crawler) + "/insert_dimension_range"
+        insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                             + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_path),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -155,24 +164,31 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("插入行或列:{}", r.json()["msg"])
+            if log_path == "logs":
+                Common.logger().info("插入行或列:{}", r.json()["msg"])
+            elif log_path == "person-logs":
+                Common.person_logger().info("插入行或列:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("插入行或列异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("插入行或列异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("插入行或列异常:{}", e)
 
     # 写入数据
     @classmethod
-    def update_values(cls, crawler, sheetid, ranges, values):
+    def update_values(cls, log_path, crawler, sheetid, ranges, values):
         """
         写入数据
+        :param log_path: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid:哪张工作表
         :param ranges:单元格范围
         :param values:写入的具体数据,list
         """
-        update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-                            + cls.spreadsheetToken(crawler) + "/values_batch_update"
+        update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                            + cls.spreadsheettoken(crawler) + "/values_batch_update"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_path),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -187,23 +203,30 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("写入数据:{}", r.json()["msg"])
+            if log_path == "logs":
+                Common.logger().info("写入数据:{}", r.json()["msg"])
+            elif log_path == "person-logs":
+                Common.person_logger().info("写入数据:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("写入数据异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("写入数据异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("写入数据异常:{}", e)
 
     # 合并单元格
     @classmethod
-    def merge_cells(cls, crawler, sheetid, ranges):
+    def merge_cells(cls, log_path, crawler, sheetid, ranges):
         """
         合并单元格
+        :param log_path: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid:哪张工作表
         :param ranges:需要合并的单元格范围
         """
-        merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-                          + cls.spreadsheetToken(crawler) + "/merge_cells"
+        merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                          + cls.spreadsheettoken(crawler) + "/merge_cells"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_path),
             "Content-Type": "application/json; charset=utf-8"
         }
 
@@ -215,24 +238,31 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("合并单元格:{}", r.json()["msg"])
+            if log_path == "logs":
+                Common.logger().info("合并单元格:{}", r.json()["msg"])
+            elif log_path == "person-logs":
+                Common.person_logger().info("合并单元格:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("合并单元格异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("合并单元格异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("合并单元格异常:{}", e)
 
     # 读取单元格数据
     @classmethod
-    def get_range_value(cls, crawler, sheetid, cell):
+    def get_range_value(cls, log_path, crawler, sheetid, cell):
         """
         读取单元格内容
+        :param log_path: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid: 哪张工作表
         :param cell: 哪个单元格
         :return: 单元格内容
         """
-        get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-                              + cls.spreadsheetToken(crawler) + "/values/" + sheetid + "!" + cell
+        get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_path),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -253,13 +283,17 @@ class Feishu:
             r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
             return r.json()["data"]["valueRange"]["values"][0]
         except Exception as e:
-            Common.logger().error("读取单元格数据异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("读取单元格数据异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("读取单元格数据异常:{}", e)
 
     # 删除行或列,可选 ROWS、COLUMNS
     @classmethod
-    def dimension_range(cls, crawler, sheetid, major_dimension, startindex, endindex):
+    def dimension_range(cls, log_path, crawler, sheetid, major_dimension, startindex, endindex):
         """
         删除行或列
+        :param log_path: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid:工作表
         :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
@@ -267,10 +301,10 @@ class Feishu:
         :param endindex:结束的位置
         :return:
         """
-        dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-                              + cls.spreadsheetToken(crawler) + "/dimension_range"
+        dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/dimension_range"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_path),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -284,9 +318,15 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("删除视频数据:{}", r.json()["msg"])
+            if log_path == "logs":
+                Common.logger().info("删除视频数据:{}", r.json()["msg"])
+            elif log_path == "person-logs":
+                Common.person_logger().info("删除视频数据:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("删除视频数据异常:{}", e)
+            if log_path == "logs":
+                Common.logger().error("删除视频数据异常:{}", e)
+            elif log_path == "person-logs":
+                Common.person_logger().error("删除视频数据异常:{}", e)
 
 
 if __name__ == "__main__":
@@ -310,7 +350,7 @@ if __name__ == "__main__":
     # print(int(feishu.get_range_value("xiaoniangao", "ba0da4", "G6:G6")[0].split(" ")[-1].split(":")[0]))
     # print(feishu.get_range_value("xiaoniangao", "ba0da4", "G6:G6")[0].split(" ")[0])
 
-    feishu.update_values("xiaoniangao", "ba0da4", "H4:H4", [["2022-05-18 21:14:27"]])
+    feishu.update_values("logs", "xiaoniangao", "ba0da4", "H4:H4", [["2022-05-18 21:14:27"]])
 
     # 看一看+工作表,插入首行
     # print(feishu.insert_columns("k2rKkv", "COLUMNS", 6, 9))

+ 69 - 66
main/hour_list.py

@@ -13,12 +13,8 @@ proxies = {"http": None, "https": None}
 
 
 class HourList:
-    # 今天的日期:年-月-日
-    today = datetime.datetime.now().strftime("%Y-%m-%d")
-    # 昨天
-    yesterday = (datetime.date.today() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
-    # 前天
-    before_yesterday = (datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d")
+    # # 今天的日期:年-月-日
+    # today = datetime.datetime.now().strftime("%Y-%m-%d")
 
     # 下载规则
     @staticmethod
@@ -263,13 +259,13 @@ class HourList:
                         Common.logger().info("该视频7天内播放量<5000:{}", video_title)
 
                     # 从云文档去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=onyBDH
-                    elif video_id in [j for i in Feishu.get_values_batch("xiaoniangao", "ba0da4") for j in i]:
+                    elif video_id in [j for i in Feishu.get_values_batch("logs", "xiaoniangao", "ba0da4") for j in i]:
                         Common.logger().info("该视频已保存过:{}", video_title)
                     else:
                         Common.logger().info("该视频未下载,添加至feeds中:{}".format(video_title))
                         # feeds工作表,插入空行
                         time.sleep(1)
-                        Feishu.insert_columns("xiaoniangao", "ba0da4", "ROWS", 2, 3)
+                        Feishu.insert_columns("logs", "xiaoniangao", "ba0da4", "ROWS", 2, 3)
 
                         # 获取当前时间
                         get_feeds_time = int(time.time())
@@ -280,21 +276,21 @@ class HourList:
                                    video_play_cnt]]
                         # 等待 1s,防止操作云文档太频繁,导致报错
                         time.sleep(1)
-                        Feishu.update_values("xiaoniangao", "ba0da4", "A3:I3", values)
+                        Feishu.update_values("logs", "xiaoniangao", "ba0da4", "A3:I3", values)
 
         except Exception as e:
             Common.logger().error("获取小时榜视频列表异常:{}", e)
 
     # 检查是否有今日的上升榜日期
     @classmethod
-    def check_hour_list_data(cls):
+    def check_hour_list_data(cls, date):
         # 判断J1单元格的日期是否为今天
-        if Feishu.get_range_value("xiaoniangao", "ba0da4", "J1:J1")[0] != cls.today:
+        if Feishu.get_range_value("logs", "xiaoniangao", "ba0da4", "J1:J1")[0] != date:
             # 插入3列 J1:L1,并写入日期和时间数据
-            values = [[cls.today], ["10:00", "15:00", "20:00"]]
-            Feishu.insert_columns("xiaoniangao", "ba0da4", "COLUMNS", 9, 12)
-            Feishu.update_values("xiaoniangao", "ba0da4",  "J1:L2", values)
-            Feishu.merge_cells("xiaoniangao", "ba0da4", "J1:L1")
+            values = [[date], ["10:00", "15:00", "20:00"]]
+            Feishu.insert_columns("logs", "xiaoniangao", "ba0da4", "COLUMNS", 9, 12)
+            Feishu.update_values("logs", "xiaoniangao", "ba0da4",  "J1:L2", values)
+            Feishu.merge_cells("logs", "xiaoniangao", "ba0da4", "J1:L1")
             Common.logger().info("插入今天日期成功")
         else:
             Common.logger().info("今日上升榜日期已存在")
@@ -306,62 +302,69 @@ class HourList:
         :params sheetid:工作表 ID
         :params startindex:从第几行开始清除
         """
-        for i in range(int(startindex), len(Feishu.get_values_batch(crawler, sheetid)) + 1):
+        for i in range(int(startindex), len(Feishu.get_values_batch("logs", crawler, sheetid)) + 1):
             time.sleep(1)
             Common.logger().info("正在检查第:{}行", i)
             # 删除空行
-            if Feishu.get_range_value(crawler, sheetid, "A" + str(i) + ":" + "A" + str(i))[0] is None\
-                    and Feishu.get_range_value(crawler, sheetid, "B" + str(i) + ":" + "B" + str(i))[0] is None\
-                    and Feishu.get_range_value(crawler, sheetid, "C" + str(i) + ":" + "C" + str(i))[0] is None\
-                    and Feishu.get_range_value(crawler, sheetid, "D" + str(i) + ":" + "D" + str(i))[0] is None:
+            if Feishu.get_range_value("logs", crawler, sheetid, "A" + str(i) + ":" + "A" + str(i))[0] is None\
+                    and Feishu.get_range_value("logs", crawler, sheetid, "B" + str(i) + ":" + "B" + str(i))[0] is None\
+                    and Feishu.get_range_value("logs", crawler, sheetid, "C" + str(i) + ":" + "C" + str(i))[0] is None\
+                    and Feishu.get_range_value("logs", crawler, sheetid, "D" + str(i) + ":" + "D" + str(i))[0] is None:
                 Common.logger().info("当前第{}行为空行,删除", i)
-                Feishu.dimension_range(crawler, sheetid, "ROWS", i, i)
+                Feishu.dimension_range("logs", crawler, sheetid, "ROWS", i, i)
         Common.logger().info("删除空行完成")
 
     # 更新小时榜数据
     @classmethod
-    def update_hour_list_data(cls):
+    def update_hour_list_data(cls, today, yesterday, before_yesterday):
         """
         更新小时榜数据
         """
         try:
-            if len(Feishu.get_values_batch("xiaoniangao", "ba0da4")) == 2:
+            if len(Feishu.get_values_batch("logs", "xiaoniangao", "ba0da4")) == 2:
                 Common.logger().info("当前工作表无数据")
             else:
-                for i in range(3, len(Feishu.get_values_batch("xiaoniangao", "ba0da4"))+1):
+                for i in range(3, len(Feishu.get_values_batch("logs", "xiaoniangao", "ba0da4"))+1):
                     time.sleep(1)
                     Common.logger().info("更新第:{}行视频信息", i)
 
                     # 略过空行
-                    if Feishu.get_range_value("xiaoniangao", "ba0da4", "D" + str(i) + ":" + "D" + str(i))[0] is None\
-                            and Feishu.get_range_value("xiaoniangao", "ba0da4", "C"+str(i)+":"+"C"+str(i))[0] is None\
-                            and Feishu.get_range_value("xiaoniangao", "ba0da4", "A"+str(i)+":"+"A"+str(i))[0] is None:
+                    if Feishu.get_range_value(
+                            "logs", "xiaoniangao", "ba0da4", "D" + str(i) + ":" + "D" + str(i))[0] is None\
+                            and Feishu.get_range_value(
+                            "logs", "xiaoniangao", "ba0da4", "C"+str(i)+":"+"C"+str(i))[0] is None\
+                            and Feishu.get_range_value(
+                            "logs", "xiaoniangao", "ba0da4", "A"+str(i)+":"+"A"+str(i))[0] is None:
                         Common.logger().info("空行,略过")
                     else:
                         # 视频标题
-                        v_title = Feishu.get_range_value("xiaoniangao", "ba0da4", "D" + str(i) + ":" + "D" + str(i))[0]
+                        v_title = Feishu.get_range_value(
+                            "logs", "xiaoniangao", "ba0da4", "D" + str(i) + ":" + "D" + str(i))[0]
                         Common.logger().info("视频详情,video_title:{},{}", v_title, type(v_title))
 
                         # 视频 ID
-                        v_id = Feishu.get_range_value("xiaoniangao", "ba0da4", "C" + str(i) + ":" + "C" + str(i))[0]
+                        v_id = Feishu.get_range_value(
+                            "logs", "xiaoniangao", "ba0da4", "C" + str(i) + ":" + "C" + str(i))[0]
                         Common.logger().info("视频详情,video_id:{},{}", v_id, type(v_id))
 
                         # profile_id,用户 ID
-                        p_id = Feishu.get_range_value("xiaoniangao", "ba0da4", "A" + str(i) + ":" + "A" + str(i))[0]
+                        p_id = Feishu.get_range_value(
+                            "logs", "xiaoniangao", "ba0da4", "A" + str(i) + ":" + "A" + str(i))[0]
                         Common.logger().info("视频详情,profile_id:{},{}", p_id, type(p_id))
 
                         # profile_mid
-                        p_mid = Feishu.get_range_value("xiaoniangao", "ba0da4", "B" + str(i) + ":" + "B" + str(i))[0]
+                        p_mid = Feishu.get_range_value(
+                            "logs", "xiaoniangao", "ba0da4", "B" + str(i) + ":" + "B" + str(i))[0]
                         Common.logger().info("视频详情,profile_mid:{},{}", p_mid, type(p_mid))
 
                         # 抓取时的播放量
                         v_play_cnt = Feishu.get_range_value(
-                            "xiaoniangao", "ba0da4", "I" + str(i) + ":" + "I" + str(i))[0]
+                            "logs", "xiaoniangao", "ba0da4", "I" + str(i) + ":" + "I" + str(i))[0]
                         Common.logger().info("视频详情,video_play_cnt:{},{}", v_play_cnt, type(v_play_cnt))
 
                         # 抓取时间
                         v_upload_time = Feishu.get_range_value(
-                            "xiaoniangao", "ba0da4", "H" + str(i) + ":" + "H" + str(i))[0]
+                            "logs", "xiaoniangao", "ba0da4", "H" + str(i) + ":" + "H" + str(i))[0]
                         Common.logger().info("视频详情,video_send_time:{},{}", v_upload_time, type(v_upload_time))
 
                         # 抓取时间:日期
@@ -427,7 +430,7 @@ class HourList:
                             Common.logger().info("视频详情,当前播放量:{}", hour_play_cnt)
                             # 固定时间获取符合规则的视频,写入云文档:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=ba0da4
                             update_hour = datetime.datetime.now()
-                            if upload_data == cls.today and update_hour.hour == 10 and int(upload_hour) <= 10:
+                            if upload_data == today and update_hour.hour == 10 and int(upload_hour) <= 10:
                                 Common.logger().info("满足条件: 抓取日期为今天 and 当前时间:10点 and 抓取时间<=10点")
 
                                 # 当天 10:00 视频播放量
@@ -438,10 +441,10 @@ class HourList:
                                 values = int(ten_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i), [[values]])
                                 Common.logger().info("10:00数据更新成功:{}", values)
 
-                            elif upload_data == cls.today and update_hour.hour == 15 and int(upload_hour) <= 10:
+                            elif upload_data == today and update_hour.hour == 15 and int(upload_hour) <= 10:
                                 Common.logger().info("满足条件: 抓取日期为今天 and 当前时间:15点 and 抓取时间<=10点")
 
                                 # 当天 15:00 视频播放量
@@ -450,20 +453,20 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J"+str(i) + ":" + "J"+str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "J"+str(i) + ":" + "J"+str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J"+str(i) + ":" + "J"+str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "J"+str(i) + ":" + "J"+str(i))[0]
 
                                 # 15:00 的上升榜写入数据
                                 values = int(fifteen_hour_play_cnt) - (int(v_play_cnt) + int(ten_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
                                 Common.logger().info("15:00数据更新成功:{}", values)
 
-                            elif upload_data == cls.today and update_hour.hour == 15 and 10 < int(upload_hour) <= 15:
+                            elif upload_data == today and update_hour.hour == 15 and 10 < int(upload_hour) <= 15:
                                 Common.logger().info("满足条件: 抓取日期为今天 and 当前时间:15点 and 10<抓取时间<=15点")
 
                                 # 当天 15:00 视频播放量
@@ -474,10 +477,10 @@ class HourList:
                                 values = int(fifteen_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
                                 Common.logger().info("15:00数据更新成功:{}", values)
 
-                            elif upload_data == cls.today and update_hour.hour == 20 and int(upload_hour) <= 10:
+                            elif upload_data == today and update_hour.hour == 20 and int(upload_hour) <= 10:
                                 Common.logger().info("满足条件: 抓取日期为今天 and 当前时间:20点 and 抓取时间<=10点")
 
                                 # 当天 20:00 视频播放量
@@ -486,29 +489,29 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
 
                                 # 当天 15:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
                                     fifteen_up_cnt = 0
                                 else:
                                     fifteen_up_cnt = Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
 
                                 # 20:00 的上升榜写入数据
                                 values = int(twenty_hour_play_cnt) - (
                                         int(v_play_cnt) + int(ten_up_cnt) + int(fifteen_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
-                            elif upload_data == cls.today and update_hour.hour == 20 and 10 < int(upload_hour) <= 15:
+                            elif upload_data == today and update_hour.hour == 20 and 10 < int(upload_hour) <= 15:
                                 Common.logger().info("满足条件: 抓取日期为今天 and 当前时间:20点 and 10<抓取时间<=15点")
 
                                 # 当天 20:00 视频播放量
@@ -517,20 +520,20 @@ class HourList:
 
                                 # 当天 15:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
                                     fifteen_up_cnt = 0
                                 else:
                                     fifteen_up_cnt = Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
 
                                 # 20:00 的上升榜写入数据
                                 values = int(twenty_hour_play_cnt) - (int(v_play_cnt) + int(fifteen_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
-                            elif upload_data == cls.today and update_hour.hour == 20 and 15 < int(upload_hour) <= 20:
+                            elif upload_data == today and update_hour.hour == 20 and 15 < int(upload_hour) <= 20:
                                 Common.logger().info("满足条件: 抓取日期为今天 and 当前时间:20点 and 15<抓取时间<=20点")
 
                                 # 当天 20:00 视频播放量
@@ -541,10 +544,10 @@ class HourList:
                                 values = int(twenty_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
-                            elif (upload_data == cls.yesterday or upload_data == cls.before_yesterday)\
+                            elif (upload_data == yesterday or upload_data == before_yesterday)\
                                     and update_hour.hour == 10:
                                 Common.logger().info("满足条件: 抓取时间小于今天 and 当前时间:10点")
 
@@ -556,10 +559,10 @@ class HourList:
                                 values = int(ten_hour_play_cnt) - int(v_play_cnt)
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i), [[values]])
                                 Common.logger().info("10:00数据更新成功:{}", values)
 
-                            elif (upload_data == cls.yesterday or upload_data == cls.before_yesterday)\
+                            elif (upload_data == yesterday or upload_data == before_yesterday)\
                                     and update_hour.hour == 15:
                                 Common.logger().info("满足条件: 抓取时间小于今天 and 当前时间:15点")
 
@@ -569,20 +572,20 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
 
                                 # 15:00 的上升榜写入数据
                                 values = int(fifteen_hour_play_cnt) - (int(v_play_cnt) + int(ten_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i), [[values]])
                                 Common.logger().info("15:00数据更新成功:{}", values)
 
-                            elif (upload_data == cls.yesterday or upload_data == cls.before_yesterday)\
+                            elif (upload_data == yesterday or upload_data == before_yesterday)\
                                     and update_hour.hour == 20:
                                 Common.logger().info("满足条件: 抓取时间小于今天 and 当前时间:20点")
 
@@ -592,26 +595,26 @@ class HourList:
 
                                 # 当天 10:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0] is None:
                                     ten_up_cnt = 0
                                 else:
                                     ten_up_cnt = Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "J" + str(i) + ":" + "J" + str(i))[0]
 
                                 # 当天 15:00 上升的数据
                                 if Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
+                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0] is None:
                                     fifteen_up_cnt = 0
                                 else:
                                     fifteen_up_cnt = Feishu.get_range_value(
-                                        "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
+                                        "logs", "xiaoniangao", "ba0da4", "K" + str(i) + ":" + "K" + str(i))[0]
 
                                 # 20:00 的上升榜写入数据
                                 values = int(twenty_hour_play_cnt) - (
                                         int(v_play_cnt) + int(ten_up_cnt) + int(fifteen_up_cnt))
                                 time.sleep(1)
                                 Feishu.update_values(
-                                    "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
+                                    "logs", "xiaoniangao", "ba0da4", "L" + str(i) + ":" + "L" + str(i), [[values]])
                                 Common.logger().info("20:00数据更新成功:{}", values)
 
                         except Exception as e:
@@ -624,4 +627,4 @@ if __name__ == "__main__":
     hour_list = HourList()
     hour_list.get_hour_list_feeds()
     # hour_list.del_null_rows("xiaoniangao", "ba0da4", 3)
-    hour_list.update_hour_list_data()
+    hour_list.update_hour_list_data(today="", yesterday="", before_yesterday="")

+ 304 - 0
main/person_list.py

@@ -0,0 +1,304 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/5/18
+import time
+import requests
+import urllib3
+from main.common import Common
+from main.feishu_lib import Feishu
+from main.publish import Publish
+
+proxies = {"http": None, "https": None}
+
+
+class Person:
+    next_t = -1
+
+    # 过滤敏感词
+    @classmethod
+    def sensitive_words(cls):
+        # 敏感词库列表
+        word_list = []
+        # 从云文档读取所有敏感词,添加到词库列表
+        lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh")
+        for i in lists:
+            for j in i:
+                # 过滤空的单元格内容
+                if j is None:
+                    pass
+                else:
+                    word_list.append(j)
+        return word_list
+
+    # 获取用户列表
+    @classmethod
+    def person_list(cls):
+        try:
+            if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) == 1:
+                Common.person_logger().info("暂无定向爬取账号")
+            else:
+                person_list = []
+                nick_list = []
+                for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) + 1):
+                    time.sleep(0.5)
+                    profile_mid = Feishu.get_range_value(
+                        "person-logs", "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0]
+                    time.sleep(0.5)
+                    nick = \
+                        Feishu.get_range_value("person-logs", "xiaoniangao", "oNpThi",
+                                               "C" + str(i) + ":" + "C" + str(i))[0]
+                    nick_list.append(nick)
+                    person_list.append(profile_mid)
+
+                Common.person_logger().info("已获取用户列表:{}", nick_list)
+                return person_list
+
+        except Exception as e:
+            Common.person_logger().error("获取用户列表异常:{}", e)
+
+    # 关注列表中的用户
+    @classmethod
+    def sub_persons(cls):
+        profile_mids = cls.person_list()
+        for profile_mid in profile_mids:
+            url = "https://api.xiaoniangao.cn/V1/account/sub_user"
+            headers = {
+                "X-Mid": "1164637358",
+                "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358",
+                "content-type": "application/json",
+                "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
+                "Accept-Encoding": "gzip,compress,br,deflate",
+                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
+                              " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
+                              "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
+                "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html"
+            }
+            data = {
+                "visited_mid": int(profile_mid),
+                "log_common_params": {
+                    "e": [{
+                        "data": {
+                            "page": "profilePage",
+                            "topic": "public",
+                            "type": "follow",
+                            "name": "user",
+                            "smid": str(profile_mid)
+                        },
+                        "ab": {}
+                    }],
+                    "ext": {
+                        "brand": "iPhone",
+                        "device": "iPhone 11",
+                        "os": "iOS 14.7.1",
+                        "weixinver": "8.0.20",
+                        "srcver": "2.24.2",
+                        "net": "wifi",
+                        "scene": "1089"
+                    },
+                    "pj": "1",
+                    "pf": "2",
+                    "session_id": "d53b6125-942b-4ec1-8d22-f9451a35e9f9"
+                },
+                "token": "451273638af2c8bb90266bcfaf601a68",
+                "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
+                "proj": "ma",
+                "wx_ver": "8.0.20",
+                "code_ver": "3.62.0"
+            }
+
+            try:
+                urllib3.disable_warnings()
+                r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
+                Common.person_logger().info("关注用户:{},{}", profile_mid, r)
+            except Exception as e:
+                Common.person_logger().error("关注用户异常:{}", e)
+
+    # 从关注列表获取视频,并下载符合规则的视频,再进行上传
+    @classmethod
+    def download_from_sub(cls, endtime):
+        url = "https://api.xiaoniangao.cn/album/get_user_trends"
+        headers = {
+            "X-Mid": "1164637358",
+            "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358",
+            "content-type": "application/json",
+            "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
+            "Accept-Encoding": "gzip,compress,br,deflate",
+            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
+                          " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
+                          "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
+            "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html"
+        }
+        data = {
+            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
+            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
+            "start_t": int(cls.next_t),
+            "limit": 5,
+            "share_width": 625,
+            "share_height": 500,
+            "token": "451273638af2c8bb90266bcfaf601a68",
+            "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
+            "proj": "ma",
+            "wx_ver": "8.0.20",
+            "code_ver": "3.62.0",
+            "log_common_params": {
+                "e": [{
+                    "data": {
+                        "page": "discoverIndexPage",
+                        "topic": "follow"
+                    }
+                }],
+                "ext": {
+                    "brand": "iPhone",
+                    "device": "iPhone 11",
+                    "os": "iOS 14.7.1",
+                    "weixinver": "8.0.20",
+                    "srcver": "2.24.2",
+                    "net": "wifi",
+                    "scene": "1089"
+                },
+                "pj": "1",
+                "pf": "2",
+                "session_id": "18da9157-5aa6-4955-a849-9160f07ee912"
+            }
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
+            cls.next_t = r.json()["data"]["next_t"]
+            # cls.next_t_list.append(next_t)
+            feeds = r.json()["data"]["list"]
+            for i in range(len(feeds)):
+                # 标题
+                video_title = feeds[i]["title"].strip().replace("\n", "") \
+                            .replace("/", "").replace("\r", "").replace("#", "") \
+                            .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
+                            .replace(":", "").replace("*", "").replace("?", "") \
+                            .replace("?", "").replace('"', "").replace("<", "") \
+                            .replace(">", "").replace("|", "").replace(" ", "")
+                Common.person_logger().info("标题:{}", video_title)
+                # 用户名
+                user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
+                    .replace("/", "").replace("快手", "").replace(" ", "") \
+                    .replace(" ", "").replace("&NBSP", "").replace("\r", "")
+                Common.person_logger().info("用户名:{}", user_name)
+                # 视频 ID
+                video_id = feeds[i]["vid"]
+                Common.person_logger().info("视频ID:{}", video_id)
+                # 播放量
+                video_play_cnt = feeds[i]["play_pv"]
+                Common.person_logger().info("播放量:{}", video_play_cnt)
+                # 评论数
+                video_comment_cnt = feeds[i]["comment_count"]
+                # 点赞
+                video_like_cnt = feeds[i]["favor"]["total"]
+                # 分享
+                video_share_cnt = feeds[i]["share"]
+                # 时长
+                video_duration = int(feeds[i]["du"] / 1000)
+                # 发布时间
+                video_send_time = feeds[i]["t"]
+                Common.person_logger().info(
+                    "发布时间:{}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
+                # 宽和高
+                video_width = feeds[i]["w"]
+                video_height = feeds[i]["h"]
+                # 头像
+                head_url = feeds[i]["user"]["hurl"]
+                # 用户 ID
+                profile_id = feeds[i]["id"]
+                # 用户 mid
+                profile_mid = feeds[i]["user"]["mid"]
+                # 封面
+                cover_url = feeds[i]["url"]
+                # 视频播放地址
+                video_url = feeds[i]["v_url"]
+                Common.person_logger().info("播放地址:{}", video_url)
+
+                # 过滤无效视频
+                if video_id == "" or video_url == "" or video_send_time == "":
+                    Common.person_logger().info("无效视频")
+                # 判断发布时间:2022年5月18日以后发布
+                elif int(video_send_time) < endtime:
+                    Common.person_logger().info(
+                        "发布时间:{},在2022年5月18日之前",
+                        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
+                # 判断视频播放量大于1000
+                elif int(video_play_cnt) < 1000:
+                    Common.person_logger().info("视频:{},播放量:{}<1000", video_title, video_play_cnt)
+                # 过滤敏感词
+                elif any(word if word in video_title else False for word in cls.sensitive_words()) is True:
+                    Common.person_logger().info("视频已中敏感词:{}".format(video_title))
+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
+                elif video_id in [j for i in Feishu.get_values_batch(
+                        "person-logs", "xiaoniangao", "yatRv2") for j in i]:
+                    Common.person_logger().info("该视频已下载:{}", video_title)
+                # 满足抓取规则
+                else:
+                    Common.person_logger().info("开始下载视频:{}", video_title)
+                    # 下载封面
+                    Common.download_method(
+                        log_path="person-logs", text="cover", d_name=video_title, d_url=cover_url)
+                    # 下载视频
+                    Common.download_method(
+                        log_path="person-logs", text="video", d_name=video_title, d_url=video_url)
+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                    with open(r"./videos/" + video_title
+                              + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
+                        f_a.write(str(video_id) + "\n" +
+                                  str(video_title) + "\n" +
+                                  str(video_duration) + "\n" +
+                                  str(video_play_cnt) + "\n" +
+                                  str(video_comment_cnt) + "\n" +
+                                  str(video_like_cnt) + "\n" +
+                                  str(video_share_cnt) + "\n" +
+                                  str(video_width)+"*"+str(video_height) + "\n" +
+                                  str(video_send_time) + "\n" +
+                                  str(user_name) + "\n" +
+                                  str(head_url) + "\n" +
+                                  str(video_url) + "\n" +
+                                  str(cover_url) + "\n" +
+                                  str("xiaoniangao"))
+                    Common.person_logger().info("==========视频信息已保存至info.txt==========")
+
+                    # 上传视频
+                    Common.person_logger().info("开始上传视频:{}".format(video_title))
+                    Publish.upload_and_publish("dev", "play")
+                    Common.person_logger().info("视频上传完成:{}", video_title)
+                    # 上传完成时间
+                    upload_time = int(time.time())
+
+                    # 保存视频信息到云文档
+                    Common.person_logger().info("添加视频到云文档:{}", video_title)
+                    # 插入空行
+                    time.sleep(1)
+                    Feishu.insert_columns("person-logs", "xiaoniangao", "yatRv2", "ROWS", 1, 2)
+                    # 视频信息写入云文档
+                    values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(upload_time))),
+                               "定向账号爬取",
+                               video_id,
+                               video_title,
+                               video_play_cnt,
+                               video_comment_cnt,
+                               video_like_cnt,
+                               video_share_cnt,
+                               video_duration,
+                               str(video_width)+"*"+str(video_height),
+                               time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000)),
+                               user_name,
+                               profile_id,
+                               profile_mid,
+                               head_url,
+                               cover_url,
+                               video_url]]
+                    time.sleep(1)
+                    Feishu.update_values("person-logs", "xiaoniangao", "yatRv2", "A2:Q2", values)
+                return int(video_send_time)
+        except Exception as e:
+            Common.person_logger().error("请求关注列表异常:{}", e)
+
+
+if __name__ == "__main__":
+    person = Person()
+    # person.person_list()
+    # person.download_person_videos()
+    person.sub_persons()

+ 4 - 5
main/publish.py

@@ -131,7 +131,7 @@ class Publish:
         os.rmdir(local_file)
         Common.logger().info("remove local file dir = {} success".format(local_file))
 
-    local_file_path = './videos'
+    local_file_path = '.\\videos'
     video_file = 'video'
     image_file = 'image'
     info_file = 'info'
@@ -139,8 +139,7 @@ class Publish:
     uids_dev_play = [6267141]
     uids_prod_up = [20631208, 20631209, 20631210, 20631211, 20631212,
                     20631213, 20631214, 20631215, 20631216, 20631217]
-    uids_prod_play = [20631228, 20631229, 20631230, 20631231, 20631232,
-                      20631233, 20631234, 20631235, 20631236, 20631237]
+    uids_prod_play = [20631196, 20631197, 20631198, 20631199, 20631200, 20631201]
 
     @classmethod
     def upload_and_publish(cls, env, job):
@@ -178,7 +177,7 @@ class Publish:
                     # 单个视频文件夹下的所有视频文件
                     for fi in dir_files:
                         # 视频文件夹下的所有文件路径
-                        fi_path = fi_d + '/' + fi
+                        fi_path = fi_d + '\\' + fi
                         Common.logger().info('dir fi_path = {}'.format(fi_path))
                         # 读取 info.txt,赋值给 data
                         if cls.info_file in fi:
@@ -205,7 +204,7 @@ class Publish:
                     # 刷新数据
                     dir_files = os.listdir(fi_d)
                     for fi in dir_files:
-                        fi_path = fi_d + '/' + fi
+                        fi_path = fi_d + '\\' + fi
                         Common.logger().info('dir fi_path = {}'.format(fi_path))
                         # 上传oss
                         if cls.video_file in fi:

+ 15 - 8
main/run_hour_list.py

@@ -17,32 +17,39 @@ def hour_list_job():
             HourList.get_hour_list_feeds()
             time.sleep(1)
 
+            # 今天的日期:年-月-日
+            today = datetime.datetime.now().strftime("%Y-%m-%d")
+            # 昨天
+            yesterday = (datetime.date.today() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
+            # 前天
+            before_yesterday = (datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d")
+
             hour_list_job_time = datetime.datetime.now()
-            if hour_list_job_time.hour == 10 and hour_list_job_time.minute <= 10:
+            if hour_list_job_time.hour == 10 and 0 <= hour_list_job_time.minute <= 10:
 
                 Common.logger().info("检查今日上升榜日期是否存在")
-                HourList.check_hour_list_data()
+                HourList.check_hour_list_data(today)
 
                 Common.logger().info("开始更新上升榜")
-                HourList.update_hour_list_data()
+                HourList.update_hour_list_data(today, yesterday, before_yesterday)
 
             elif hour_list_job_time.hour == 15 and hour_list_job_time.minute <= 10:
 
                 Common.logger().info("检查今日上升榜日期是否存在")
-                HourList.check_hour_list_data()
+                HourList.check_hour_list_data(today)
 
                 Common.logger().info("开始更新上升榜")
-                HourList.update_hour_list_data()
+                HourList.update_hour_list_data(today, yesterday, before_yesterday)
 
             elif hour_list_job_time.hour == 20 and hour_list_job_time.minute <= 10:
 
                 Common.logger().info("检查今日上升榜日期是否存在")
-                HourList.check_hour_list_data()
+                HourList.check_hour_list_data(today)
 
                 Common.logger().info("开始更新上升榜")
-                HourList.update_hour_list_data()
+                HourList.update_hour_list_data(today, yesterday, before_yesterday)
 
-            elif hour_list_job_time.hour == 23 and hour_list_job_time.minute >= 55:
+            elif hour_list_job_time.hour == 23 and hour_list_job_time.minute >= 50:
 
                 break
 

+ 35 - 0
main/run_person.py

@@ -1,3 +1,38 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Time: 2022/5/18
+import datetime
+import os
+import sys
+import time
+
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.person_list import Person
+
+
+def person_list_job():
+    while True:
+        Common.person_logger().info("开始抓取小年糕关注榜")
+        # 关注用户列表
+        Person.sub_persons()
+        while True:
+            # 任务结束时间:小于 2022年5月18日
+            endtime = 1652803200000
+            person_list_time = datetime.datetime.now()
+
+            if Person.download_from_sub(endtime) >= endtime:
+                Person.download_from_sub(endtime)
+            elif person_list_time.hour == 23 and person_list_time.minute >= 50:
+                Common.person_logger().info("结束今日抓取任务")
+                Person.next_t = -1
+                break
+            else:
+                Common.person_logger().info("发布时间大于2022年5月18日,结束抓取任务")
+                time.sleep(3600)
+                Person.next_t = -1
+                break
+
+
+if __name__ == "__main__":
+    person_list_job()