فهرست منبع

已下载视频表:增加站内视频链接;已下载视频信息同步至监控表

wangkun 2 سال پیش
والد
کامیت
40e1f35151
12فایلهای تغییر یافته به همراه665 افزوده شده و 570 حذف شده
  1. 85 5
      README.md
  2. 29 80
      main/common.py
  3. 72 0
      main/dellogo_publish.py
  4. 1 1
      main/demo.py
  5. 44 111
      main/feishu_lib.py
  6. 267 245
      main/hour_list.py
  7. 95 63
      main/person_list.py
  8. 48 35
      main/publish.py
  9. 16 16
      main/run_hour_list.py
  10. 8 7
      main/run_person_list.py
  11. 0 3
      person-logs/__init__.py
  12. 0 4
      requirements.txt

+ 85 - 5
README.md

@@ -1,25 +1,105 @@
 小年糕爬虫
 
-7 天内,播放量>=5000
-时长 1-10min
+python==3.10
+loguru==0.6.0
+oss2==2.15.0
+requests==2.27.1
+urllib3==1.26.9
 
-每小时新增播放数据
+执行入口:
+
+1.cd ./crawler-xiaoniangao
+
+2.python3 ./main/run_xx.py
+
+
+
+==========2022/6/29===========
+
+已下载视频表:预留前 5 列备用
+
+已下载视频表增加列:站内视频链接
+
+已下载视频,同步信息至监控表:https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?sheet=6fed97
+
+
+
+==========2022/6/17==========
+
+小时榜抓取基础规则:
+
+1.时长>=40s
+
+2.发布时间<=10天
+
+3.播放量>=4000
+
+4.过滤无效视频(标题、ID、时长等为空)
+
+5.过滤敏感词库
+
+6.从已下载表中去重
+
+7.从小时榜表中去重
+
+小时榜更新规则:
+
+1.抓取时间<=2天
+
+2.每天 10、15、20 点更新播放量的差值(上升榜中写入的数据:当前播放量 - 上个时间段的播放量)
+
+小时榜下载规则:
+
+1.抓取时间<=3天
+
+2.视频时长>=40s
+
+3.任意时间段播放量上升>=5000 或 连续两个时间段播放量上升>=2000
+
+
+关注榜抓取基础规则:
+
+1.时长>=40s
+
+2.发布时间<=2天
+
+3.播放量>=5000
+
+关注榜下载规则:
+
+1.过滤无效视频(标题、ID、时长等为空)
+
+2.过滤敏感词库
+
+3.从已下载表中去重
 
-爬取时间要小于上升榜时间
-上升榜中写入的数据:当前播放量 - 上个时间段的播放量
 
 上传视频时,info.txt文件中的视频信息包含:
+
 str(download_video_id)
+
 str(download_video_title)
+
 str(download_video_duration)
+
 str(download_video_play_cnt)
+
 str(download_video_comment_cnt)
+
 str(download_video_like_cnt)
+
 str(download_video_share_cnt)
+
 str(download_video_resolution)
+
 str(download_video_send_time)
+
 str(download_user_name)
+
 str(download_head_url)
+
 str(download_video_url)
+
 str(download_cover_url)
+
 str(download_video_session)

+ 29 - 80
main/common.py

@@ -27,7 +27,7 @@ class Common:
 
     # 使用 logger 模块生成日志
     @staticmethod
-    def logger():
+    def logger(log_type):
         """
         使用 logger 模块生成日志
         """
@@ -38,7 +38,12 @@ class Common:
             os.makedirs(log_path)
 
         # 日志文件名
-        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
+        if log_type == "hour":
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-xiaoniangao-hour.log'
+        elif log_type == "person":
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-xiaoniangao-person.log'
+        else:
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-xiaoniangao.log'
 
         # 日志不打印到控制台
         logger.remove(handler_id=None)
@@ -52,76 +57,32 @@ class Common:
 
         return logger
 
-    # 使用 logger 模块生成日志
-    @staticmethod
-    def person_logger():
-        """
-        使用 logger 模块生成日志
-        """
-        # 日志路径
-        log_dir = "./person-logs/"
-        log_path = os.getcwd() + os.sep + log_dir
-        if not os.path.isdir(log_path):
-            os.makedirs(log_path)
-
-        # 日志文件名
-        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
-
-        # 日志不打印到控制台
-        logger.remove(handler_id=None)
-
-        # rotation="500 MB",实现每 500MB 存储一个文件
-        # rotation="12:00",实现每天 12:00 创建一个文件
-        # rotation="1 week",每周创建一个文件
-        # retention="10 days",每隔10天之后就会清理旧的日志
-        # 初始化日志
-        logger.add(log_dir + log_name, level="INFO", rotation='00:00')
-
-        return logger
-
-    # 清除日志,保留最近 7 个文件
+    # 清除日志,保留最近 6 个文件
     @classmethod
-    def del_logs(cls, d_dir):
+    def del_logs(cls, log_type):
         """
         清除冗余日志文件
         :d_dir: 需要删除的 log 地址
-        :return: 保留最近 7 个日志
+        :return: 保留最近 6 个日志
         """
-        if d_dir == "logs":
-            logs_dir = "./logs/"
-            all_files = sorted(os.listdir(logs_dir))
-            all_logs = []
-            for log in all_files:
-                name = os.path.splitext(log)[-1]
-                if name == ".log":
-                    all_logs.append(log)
-
-            if len(all_logs) <= 7:
-                pass
-            else:
-                for file in all_logs[:len(all_logs) - 7]:
-                    os.remove(logs_dir + file)
-            cls.logger().info("清除冗余日志成功")
-
-        elif d_dir == "person-logs":
-            logs_dir = "./person-logs/"
-            all_files = sorted(os.listdir(logs_dir))
-            all_logs = []
-            for log in all_files:
-                name = os.path.splitext(log)[-1]
-                if name == ".log":
-                    all_logs.append(log)
-
-            if len(all_logs) <= 7:
-                pass
-            else:
-                for file in all_logs[:len(all_logs) - 7]:
-                    os.remove(logs_dir + file)
-            cls.person_logger().info("清除冗余日志成功")
+        logs_dir = "./logs/"
+        all_files = sorted(os.listdir(logs_dir))
+        all_logs = []
+        for log in all_files:
+            name = os.path.splitext(log)[-1]
+            if name == ".log":
+                all_logs.append(log)
+
+        if len(all_logs) <= 6:
+            pass
+        else:
+            for file in all_logs[:len(all_logs) - 6]:
+                os.remove(logs_dir + file)
+        cls.logger(log_type).info("清除冗余日志成功")
 
     # 封装下载视频或封面的方法
     @classmethod
-    def download_method(cls, log_path, text, d_name, d_url):
+    def download_method(cls, log_type, text, d_name, d_url):
         """
         下载封面:text == "cover" ; 下载视频:text == "video"
         需要下载的视频标题:d_title
@@ -150,15 +111,9 @@ class Common:
                 with open(video_dir + video_name, "wb") as f:
                     for chunk in response.iter_content(chunk_size=10240):
                         f.write(chunk)
-                if log_path == "logs":
-                    cls.logger().info("==========视频下载完成==========")
-                elif log_path == "person-logs":
-                    cls.person_logger().info("==========视频下载完成==========")
+                cls.logger(log_type).info("==========视频下载完成==========")
             except Exception as e:
-                if log_path == "logs":
-                    cls.logger().exception("视频下载失败:{}", e)
-                elif log_path == "person-logs":
-                    cls.person_logger().exception("视频下载失败:{}", e)
+                cls.logger(log_type).exception("视频下载失败:{}", e)
 
         # 下载封面
         elif text == "cover":
@@ -175,15 +130,9 @@ class Common:
             try:
                 with open(video_dir + cover_name, "wb") as f:
                     f.write(response.content)
-                if log_path == "logs":
-                    cls.logger().info("==========封面下载完成==========")
-                elif log_path == "person-logs":
-                    cls.person_logger().info("==========封面下载完成==========")
+                cls.logger(log_type).info("==========封面下载完成==========")
             except Exception as e:
-                if log_path == "logs":
-                    cls.logger().exception("封面下载失败:{}", e)
-                elif log_path == "person-logs":
-                    cls.person_logger().exception("封面下载失败:{}", e)
+                cls.logger(log_type).exception("封面下载失败:{}", e)
 
 
 if __name__ == "__main__":

+ 72 - 0
main/dellogo_publish.py

@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/6/17
+"""
+去水印功能
+"""
+import sys
+
+from typing import List
+
+from alibabacloud_mts20140618.client import Client as Mts20140618Client
+from alibabacloud_tea_openapi import models as open_api_models
+from alibabacloud_mts20140618 import models as mts_20140618_models
+from alibabacloud_tea_util import models as util_models
+# from alibabacloud_tea_util.client import Client as UtilClient
+
+from main.common import Common
+
+
+class DelLogo:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def create_client(
+            access_key_id: str,
+            access_key_secret: str,
+    ) -> Mts20140618Client:
+        """
+        使用AK&SK初始化账号Client
+        @param access_key_id:
+        @param access_key_secret:
+        @return: Client
+        @throws Exception
+        """
+        config = open_api_models.Config(
+            # 您的 AccessKey ID,
+            access_key_id=access_key_id,
+            # 您的 AccessKey Secret,
+            access_key_secret=access_key_secret
+        )
+        # 访问的域名
+        config.endpoint = f'mts.cn-hangzhou.aliyuncs.com'
+        return Mts20140618Client(config)
+
+    @staticmethod
+    def main(log_type, args: List[str], ) -> None:
+        client = DelLogo.create_client('accessKeyId', 'accessKeySecret')
+        submit_iproduction_job_request = mts_20140618_models.SubmitIProductionJobRequest()
+        runtime = util_models.RuntimeOptions()
+        try:
+            # 复制代码运行请自行打印 API 的返回值
+            client.submit_iproduction_job_with_options(submit_iproduction_job_request, runtime)
+        except Exception as error:
+            # 如有需要,请打印 error
+            Common.logger(log_type).error(error)
+
+    @staticmethod
+    async def main_async(log_type, args: List[str], ) -> None:
+        client = DelLogo.create_client('accessKeyId', 'accessKeySecret')
+        submit_iproduction_job_request = mts_20140618_models.SubmitIProductionJobRequest()
+        runtime = util_models.RuntimeOptions()
+        try:
+            # 复制代码运行请自行打印 API 的返回值
+            await client.submit_iproduction_job_with_options_async(submit_iproduction_job_request, runtime)
+        except Exception as error:
+            # 如有需要,请打印 error
+            Common.logger(log_type).error(error)
+
+
+if __name__ == '__main__':
+    DelLogo.main("hour", sys.argv[1:])

+ 1 - 1
main/demo.py

@@ -95,5 +95,5 @@ class Demo:
 
 if __name__ == "__main__":
     demo = Demo()
-    demo.video_detail_info("44014961", "1160417280", "4556718896")
+    demo.video_detail_info("44504014", "1105334217", "4433151749")
     # demo.times()

+ 44 - 111
main/feishu_lib.py

@@ -23,6 +23,8 @@ class Feishu:
     weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
     # 小年糕爬虫数据表
     xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
+    # 数据监控表
+    crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
 
     # 飞书路径token
     @classmethod
@@ -38,10 +40,12 @@ class Feishu:
             return "shtcn5YSWg91JfVGzj0SFZIRRPh"
         elif crawler == "xiaoniangao":
             return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
+        elif crawler == "monitor":
+            return "shtcnlZWYazInhf7Z60jkbLRJyd"
 
     # 获取飞书api token
     @classmethod
-    def get_token(cls, log_path):
+    def get_token(cls, log_type):
         """
         获取飞书api token
         :return:
@@ -56,14 +60,11 @@ class Feishu:
             tenant_access_token = response.json()["tenant_access_token"]
             return tenant_access_token
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("获取飞书 api token 异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("获取飞书 api token 异常:{}", e)
+            Common.logger(log_type).error("获取飞书 api token 异常:{}", e)
 
     # 获取表格元数据
     @classmethod
-    def get_metainfo(cls, log_path, crawler):
+    def get_metainfo(cls, log_type, crawler):
         """
         获取表格元数据
         :return:
@@ -72,7 +73,7 @@ class Feishu:
                            + cls.spreadsheettoken(crawler) + "/metainfo"
 
         headers = {
-            "Authorization": "Bearer " + cls.get_token(log_path),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -85,17 +86,14 @@ class Feishu:
             response = json.loads(r.content.decode("utf8"))
             return response
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("获取表格元数据异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("获取表格元数据异常:{}", e)
+            Common.logger(log_type).error("获取表格元数据异常:{}", e)
 
     # 读取工作表中所有数据
     @classmethod
-    def get_values_batch(cls, log_path, crawler, sheetid):
+    def get_values_batch(cls, log_type, crawler, sheetid):
         """
         读取工作表中所有数据
-        :param log_path: 启用哪个 log
+        :param log_type: 启用哪个 log
         :param crawler: 哪个爬虫
         :param sheetid: 哪张表
         :return: 所有数据
@@ -103,7 +101,7 @@ class Feishu:
         get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
                                + cls.spreadsheettoken(crawler) + "/values_batch_get"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(log_path),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -129,18 +127,15 @@ class Feishu:
             values = response["data"]["valueRanges"][0]["values"]
             return values
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("读取工作表所有数据异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("读取工作表所有数据异常:{}", e)
+            Common.logger(log_type).error("读取工作表所有数据异常:{}", e)
 
     # 工作表,插入行或列
     @classmethod
-    def insert_columns(cls, log_path, crawler, sheetid, majordimension, startindex, endindex):
+    def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
         """
         工作表插入行或列
-        :param log_path: 日志路径
-        :param crawler: 哪个爬虫
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
         :param sheetid:哪张工作表
         :param majordimension:行或者列
         :param startindex:开始位置
@@ -149,7 +144,7 @@ class Feishu:
         insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
                              + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(log_path),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -164,23 +159,17 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
-            if log_path == "logs":
-                Common.logger().info("插入行或列:{}", r.json()["msg"])
-            elif log_path == "person-logs":
-                Common.person_logger().info("插入行或列:{}", r.json()["msg"])
+            Common.logger(log_type).info("插入行或列:{}", r.json()["msg"])
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("插入行或列异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("插入行或列异常:{}", e)
+            Common.logger(log_type).error("插入行或列异常:{}", e)
 
     # 写入数据
     @classmethod
-    def update_values(cls, log_path, crawler, sheetid, ranges, values):
+    def update_values(cls, log_type, crawler, sheetid, ranges, values):
         """
         写入数据
-        :param log_path: 日志路径
-        :param crawler: 哪个爬虫
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
         :param sheetid:哪张工作表
         :param ranges:单元格范围
         :param values:写入的具体数据,list
@@ -188,7 +177,7 @@ class Feishu:
         update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
                             + cls.spreadsheettoken(crawler) + "/values_batch_update"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(log_path),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -203,22 +192,16 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
-            if log_path == "logs":
-                Common.logger().info("写入数据:{}", r.json()["msg"])
-            elif log_path == "person-logs":
-                Common.person_logger().info("写入数据:{}", r.json()["msg"])
+            Common.logger(log_type).info("写入数据:{}", r.json()["msg"])
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("写入数据异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("写入数据异常:{}", e)
+            Common.logger(log_type).error("写入数据异常:{}", e)
 
     # 合并单元格
     @classmethod
-    def merge_cells(cls, log_path, crawler, sheetid, ranges):
+    def merge_cells(cls, log_type, crawler, sheetid, ranges):
         """
         合并单元格
-        :param log_path: 日志路径
+        :param log_type: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid:哪张工作表
         :param ranges:需要合并的单元格范围
@@ -226,7 +209,7 @@ class Feishu:
         merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
                           + cls.spreadsheettoken(crawler) + "/merge_cells"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(log_path),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
 
@@ -238,22 +221,16 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
-            if log_path == "logs":
-                Common.logger().info("合并单元格:{}", r.json()["msg"])
-            elif log_path == "person-logs":
-                Common.person_logger().info("合并单元格:{}", r.json()["msg"])
+            Common.logger(log_type).info("合并单元格:{}", r.json()["msg"])
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("合并单元格异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("合并单元格异常:{}", e)
+            Common.logger(log_type).error("合并单元格异常:{}", e)
 
     # 读取单元格数据
     @classmethod
-    def get_range_value(cls, log_path, crawler, sheetid, cell):
+    def get_range_value(cls, log_type, crawler, sheetid, cell):
         """
         读取单元格内容
-        :param log_path: 日志路径
+        :param log_type: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid: 哪张工作表
         :param cell: 哪个单元格
@@ -262,7 +239,7 @@ class Feishu:
         get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
                               + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
         headers = {
-            "Authorization": "Bearer " + cls.get_token(log_path),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -283,17 +260,14 @@ class Feishu:
             r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
             return r.json()["data"]["valueRange"]["values"][0]
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("读取单元格数据异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("读取单元格数据异常:{}", e)
+            Common.logger(log_type).error("读取单元格数据异常:{}", e)
 
     # 删除行或列,可选 ROWS、COLUMNS
     @classmethod
-    def dimension_range(cls, log_path, crawler, sheetid, major_dimension, startindex, endindex):
+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
         """
         删除行或列
-        :param log_path: 日志路径
+        :param log_type: 日志路径
         :param crawler: 哪个爬虫
         :param sheetid:工作表
         :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
@@ -304,7 +278,7 @@ class Feishu:
         dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
                               + cls.spreadsheettoken(crawler) + "/dimension_range"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(log_path),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -318,59 +292,18 @@ class Feishu:
         try:
             urllib3.disable_warnings()
             r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
-            if log_path == "logs":
-                Common.logger().info("删除视频数据:{}", r.json()["msg"])
-            elif log_path == "person-logs":
-                Common.person_logger().info("删除视频数据:{}", r.json()["msg"])
+            Common.logger(log_type).info("删除视频数据:{}", r.json()["msg"])
         except Exception as e:
-            if log_path == "logs":
-                Common.logger().error("删除视频数据异常:{}", e)
-            elif log_path == "person-logs":
-                Common.person_logger().error("删除视频数据异常:{}", e)
+            Common.logger(log_type).error("删除视频数据异常:{}", e)
 
 
 if __name__ == "__main__":
     feishu = Feishu()
 
-    print(feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "B3:B3")[0])
-    print(feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "B4:B4")[0])
-    print(feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"])
-    print(feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "B6:B6")[0])
-    print(feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "B7:B7")[0])
-
-    # # 获取飞书api token
-    # feishu.get_token()
-    # 获取表格元数据
-    # feishu.get_metainfo()
-
-    # 读取工作表中所有数据
-    # print(feishu.get_values_batch("xiaoniangao", "ba0da4"))
-    # print(len(feishu.get_values_batch("k2rKkv")))
-    # for i in range(3, len(feishu.get_values_batch("k2rKkv"))+1):
-    #     print(feishu.get_range_value("k2rKkv", "A" + str(i) + ":" + "A" + str(i))[0])
-    #     print(feishu.update_hour_list_values("k2rKkv", "G" + str(i) + ":" + "H" + str(i), [["333"]]))
-    #     time.sleep(0.5)
-    # feishu.get_range_value("k2rKkv", "F3:F3")[0]
-    # print(type(feishu.get_range_value("k2rKkv", "H19:H19")[0].split(" ")[-1].split(":")[0]))
-    # print(feishu.get_range_value("k2rKkv", "H19:H19")[0])
-    # print(int(feishu.get_range_value("xiaoniangao", "ba0da4", "G6:G6")[0].split(" ")[-1].split(":")[0]))
-    # print(feishu.get_range_value("xiaoniangao", "ba0da4", "G6:G6")[0].split(" ")[0])
-
-    # feishu.update_values("logs", "xiaoniangao", "ba0da4", "H4:H4", [["2022-05-18 21:14:27"]])
-
-    # 看一看+工作表,插入首行
-    # print(feishu.insert_columns("k2rKkv", "COLUMNS", 6, 9))
-    # print(feishu.update_hour_list_values("k2rKkv"))
-    # print(feishu.merge_cells("k2rKkv", "G1:I1"))
-    #
-    # # 看一看+工作表,首行写入数据
-    # print(feishu.update_values("Y8N3Vl", "a1", "b1", "c1", "d1", "e1", "f1", "g1",
-    #                            "h1", "i1", "j1", "k1", "l1", "m1", "n1", "o1"))
-
-    # 查询单元格内容
-    # print(type(feishu.get_range_value("k2rKkv", "G1:G1")[0]))
-    #
-    # 删除行或列,可选 ROWS、COLUMNS
-    # feishu.dimension_range("k2rKkv", "ROWS", 3, 3)
+    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B3:B3")[0])
+    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B4:B4")[0])
+    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"])
+    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B6:B6")[0])
+    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B7:B7")[0])
 
     pass

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 267 - 245
main/hour_list.py


+ 95 - 63
main/person_list.py

@@ -15,19 +15,19 @@ class Person:
     # 翻页初始值
     next_t_list = [-1]
 
-    person_x_b3_traceid = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C3:C3")[0]
-    person_x_token_id = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C4:C4")[0]
-    person_referer = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"]
-    person_uid = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C6:C6")[0]
-    person_token = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C7:C7")[0]
+    person_x_b3_traceid = Feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C3:C3")[0]
+    person_x_token_id = Feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C4:C4")[0]
+    person_referer = Feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"]
+    person_uid = Feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C6:C6")[0]
+    person_token = Feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C7:C7")[0]
 
     # 过滤敏感词
     @classmethod
-    def sensitive_words(cls):
+    def sensitive_words(cls, log_type):
         # 敏感词库列表
         word_list = []
         # 从云文档读取所有敏感词,添加到词库列表
-        lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh")
+        lists = Feishu.get_values_batch(log_type, "xiaoniangao", "DRAnZh")
         for i in lists:
             for j in i:
                 # 过滤空的单元格内容
@@ -52,7 +52,7 @@ class Person:
         :return: 满足规则,返回 True;反之,返回 False
         """
         # 视频时长
-        if 600 >= int(float(d_duration)) >= 60:
+        if int(float(d_duration)) >= 40:
             # 宽或高
             if int(d_width) >= 0 or int(d_height) >= 0:
                 # 播放量
@@ -77,60 +77,60 @@ class Person:
 
     # 获取关注用户列表
     @classmethod
-    def follow_person_list(cls):
+    def follow_person_list(cls, log_type):
         try:
-            if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) == 1:
-                Common.person_logger().info("暂无定向爬取账号")
+            if len(Feishu.get_values_batch(log_type, "xiaoniangao", "oNpThi")) == 1:
+                Common.logger(log_type).info("暂无定向爬取账号")
             else:
                 person_list = []
                 nick_list = []
-                for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) + 1):
+                for i in range(2, len(Feishu.get_values_batch(log_type, "xiaoniangao", "oNpThi")) + 1):
                     time.sleep(0.5)
                     profile_mid = Feishu.get_range_value(
-                        "person-logs", "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0]
+                        log_type, "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0]
                     time.sleep(0.5)
                     nick = \
-                        Feishu.get_range_value("person-logs", "xiaoniangao", "oNpThi",
+                        Feishu.get_range_value(log_type, "xiaoniangao", "oNpThi",
                                                "C" + str(i) + ":" + "C" + str(i))[0]
                     nick_list.append(nick)
                     person_list.append(profile_mid)
 
-                Common.person_logger().info("已获取用户列表:{}", nick_list)
+                Common.logger(log_type).info("已获取用户列表:{}", nick_list)
                 return person_list
 
         except Exception as e:
-            Common.person_logger().error("获取用户列表异常:{}", e)
+            Common.logger(log_type).error("获取用户列表异常:{}", e)
 
     # 获取取消关注用户列表
     @classmethod
-    def unfollow_person_list(cls):
+    def unfollow_person_list(cls, log_type):
         try:
-            if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "tuMNhn")) == 1:
-                Common.person_logger().info("暂无定向账号")
+            if len(Feishu.get_values_batch(log_type, "xiaoniangao", "tuMNhn")) == 1:
+                Common.logger(log_type).info("暂无定向账号")
             else:
                 unfollow_person_list = []
                 nick_list = []
-                for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "tuMNhn")) + 1):
+                for i in range(2, len(Feishu.get_values_batch(log_type, "xiaoniangao", "tuMNhn")) + 1):
                     time.sleep(0.5)
                     profile_mid = Feishu.get_range_value(
-                        "person-logs", "xiaoniangao", "tuMNhn", "B" + str(i) + ":" + "B" + str(i))[0]
+                        log_type, "xiaoniangao", "tuMNhn", "B" + str(i) + ":" + "B" + str(i))[0]
                     time.sleep(0.5)
                     nick = \
-                        Feishu.get_range_value("person-logs", "xiaoniangao", "tuMNhn",
+                        Feishu.get_range_value(log_type, "xiaoniangao", "tuMNhn",
                                                "C" + str(i) + ":" + "C" + str(i))[0]
                     nick_list.append(nick)
                     unfollow_person_list.append(profile_mid)
 
-                Common.person_logger().info("取消关注用户列表:{}", nick_list)
+                Common.logger(log_type).info("取消关注用户列表:{}", nick_list)
                 return unfollow_person_list
 
         except Exception as e:
-            Common.person_logger().error("获取用户列表异常:{}", e)
+            Common.logger(log_type).error("获取用户列表异常:{}", e)
 
     # 关注列表中的用户
     @classmethod
-    def sub_persons(cls):
-        profile_mids = cls.follow_person_list()
+    def sub_persons(cls, log_type):
+        profile_mids = cls.follow_person_list(log_type)
         for profile_mid in profile_mids:
             url = "https://api.xiaoniangao.cn/V1/account/sub_user"
             headers = {
@@ -176,18 +176,17 @@ class Person:
                 "wx_ver": "8.0.20",
                 "code_ver": "3.62.0"
             }
-
             try:
                 urllib3.disable_warnings()
                 r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-                Common.person_logger().info("关注用户:{},{}", profile_mid, r)
+                Common.logger(log_type).info("关注用户:{},{}", profile_mid, r)
             except Exception as e:
-                Common.person_logger().error("关注用户异常:{}", e)
+                Common.logger(log_type).error("关注用户异常:{}", e)
 
     # 取消关注
     @classmethod
-    def unsub_persons(cls):
-        unsub_profile_mids = cls.unfollow_person_list()
+    def unsub_persons(cls, log_type):
+        unsub_profile_mids = cls.unfollow_person_list(log_type)
         for profile_mid in unsub_profile_mids:
             url = "https://api.xiaoniangao.cn/V1/account/unsub_user"
             headers = {
@@ -232,17 +231,16 @@ class Person:
                 "proj": "ma",
                 "wx_ver": "8.0.20",
                 "code_ver": "3.64.1"}
-
             try:
                 urllib3.disable_warnings()
                 r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-                Common.person_logger().info("取消关注:{},{}", profile_mid, r)
+                Common.logger(log_type).info("取消关注:{},{}", profile_mid, r)
             except Exception as e:
-                Common.person_logger().error("取消关注异常:{}", e)
+                Common.logger(log_type).error("取消关注异常:{}", e)
 
     # 从关注列表获取视频,并下载符合规则的视频,再进行上传
     @classmethod
-    def download_from_sub(cls, endtime):
+    def download_from_sub(cls, log_type, endtime):
         url = "https://api.xiaoniangao.cn/album/get_user_trends"
         headers = {
             "x-b3-traceid": cls.person_x_b3_traceid,
@@ -302,18 +300,18 @@ class Person:
                     .replace(":", "").replace("*", "").replace("?", "") \
                     .replace("?", "").replace('"', "").replace("<", "") \
                     .replace(">", "").replace("|", "").replace(" ", "")
-                Common.person_logger().info("标题:{}", video_title)
+
                 # 用户名
                 user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
                     .replace("/", "").replace("快手", "").replace(" ", "") \
                     .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-                Common.person_logger().info("用户名:{}", user_name)
+
                 # 视频 ID
                 video_id = feeds[i]["vid"]
-                Common.person_logger().info("视频ID:{}", video_id)
+
                 # 播放量
                 video_play_cnt = feeds[i]["play_pv"]
-                Common.person_logger().info("播放量:{}", video_play_cnt)
+
                 # 评论数
                 video_comment_cnt = feeds[i]["comment_count"]
                 # 点赞
@@ -324,8 +322,7 @@ class Person:
                 video_duration = int(feeds[i]["du"] / 1000)
                 # 发布时间
                 video_send_time = feeds[i]["t"]
-                Common.person_logger().info(
-                    "发布时间:{}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
+
                 # 宽和高
                 video_width = feeds[i]["w"]
                 video_height = feeds[i]["h"]
@@ -339,32 +336,39 @@ class Person:
                 cover_url = feeds[i]["url"]
                 # 视频播放地址
                 video_url = feeds[i]["v_url"]
-                Common.person_logger().info("播放地址:{}", video_url)
+
+                Common.logger(log_type).info("标题:{}", video_title)
+                Common.logger(log_type).info("视频ID:{}", video_id)
+                Common.logger(log_type).info("用户名:{}", user_name)
+                Common.logger(log_type).info("播放量:{}", video_play_cnt)
+                Common.logger(log_type).info(
+                    "发布时间:{}", time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
+                Common.logger(log_type).info("播放地址:{}", video_url)
 
                 # 过滤无效视频
                 if video_id == "" or video_url == "" or video_send_time == "":
-                    Common.person_logger().info("无效视频")
+                    Common.logger(log_type).info("无效视频")
                 elif int(video_send_time) < endtime:
-                    Common.person_logger().info("发布时间超过 48 小时")
+                    Common.logger(log_type).info("发布时间超过 48 小时")
                 elif cls.download_rule(
                         video_duration, video_width, video_height, video_play_cnt,
                         video_like_cnt, video_share_cnt, video_send_time) is False:
-                    Common.person_logger().info("不满足基础门槛规则")
+                    Common.logger(log_type).info("不满足基础门槛规则")
                 # 过滤敏感词
-                elif any(word if word in video_title else False for word in cls.sensitive_words()) is True:
-                    Common.person_logger().info("视频已中敏感词:{}".format(video_title))
+                elif any(word if word in video_title else False for word in cls.sensitive_words(log_type)) is True:
+                    Common.logger(log_type).info("视频已中敏感词:{}".format(video_title))
                 # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
-                elif video_id in [j for i in Feishu.get_values_batch("person-logs", "xiaoniangao", "yatRv2") for j in i]:
-                    Common.person_logger().info("该视频已下载:{}", video_title)
+                elif video_id in [j for i in Feishu.get_values_batch(log_type, "xiaoniangao", "yatRv2") for j in i]:
+                    Common.logger(log_type).info("该视频已下载:{}", video_title)
                 # 满足抓取规则
                 else:
-                    Common.person_logger().info("开始下载视频:{}", video_title)
+                    Common.logger(log_type).info("开始下载视频:{}", video_title)
                     # 下载封面
                     Common.download_method(
-                        log_path="person-logs", text="cover", d_name=video_title, d_url=cover_url)
+                        log_type=log_type, text="cover", d_name=video_title, d_url=cover_url)
                     # 下载视频
                     Common.download_method(
-                        log_path="person-logs", text="video", d_name=video_title, d_url=video_url)
+                        log_type=log_type, text="video", d_name=video_title, d_url=video_url)
                     # 保存视频信息至 "./videos/{download_video_title}/info.txt"
                     with open(r"./videos/" + video_title
                               + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
@@ -382,32 +386,36 @@ class Person:
                                   str(video_url) + "\n" +
                                   str(cover_url) + "\n" +
                                   str("xiaoniangao"))
-                    Common.person_logger().info("==========视频信息已保存至info.txt==========")
+                    Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
 
                     # 上传视频
-                    Common.person_logger().info("开始上传视频:{}".format(video_title))
-                    Publish.upload_and_publish("prod", "play")
-                    Common.person_logger().info("视频上传完成:{}", video_title)
+                    Common.logger(log_type).info("开始上传视频:{}".format(video_title))
+                    our_video_id = Publish.upload_and_publish(log_type, "prod", "play")
+                    # Common.logger(log_type).info("our_video_id:{}", our_video_id)
+                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
+                    # Common.logger(log_type).info("our_video_link:{}", our_video_link)
+                    Common.logger(log_type).info("视频上传完成:{}", video_title)
                     # 上传完成时间
                     upload_time = int(time.time())
 
                     # 保存视频信息到云文档
-                    Common.person_logger().info("添加视频到云文档:{}", video_title)
+                    Common.logger(log_type).info("添加视频到云文档:{}", video_title)
                     # 插入空行
                     time.sleep(1)
-                    Feishu.insert_columns("person-logs", "xiaoniangao", "yatRv2", "ROWS", 1, 2)
+                    Feishu.insert_columns(log_type, "xiaoniangao", "yatRv2", "ROWS", 1, 2)
                     # 视频信息写入云文档
-                    values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(upload_time))),
+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(upload_time))),
                                "定向账号爬取",
                                video_id,
                                video_title,
+                               our_video_link,
                                video_play_cnt,
                                video_comment_cnt,
                                video_like_cnt,
                                video_share_cnt,
                                video_duration,
                                str(video_width) + "*" + str(video_height),
-                               time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
+                               time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
                                user_name,
                                profile_id,
                                profile_mid,
@@ -415,10 +423,29 @@ class Person:
                                cover_url,
                                video_url]]
                     time.sleep(1)
-                    Feishu.update_values("person-logs", "xiaoniangao", "yatRv2", "A2:Q2", values)
+                    Feishu.update_values(log_type, "xiaoniangao", "yatRv2", "F2:W2", values)
+
+                    # 保存视频信息到监控表
+                    Common.logger(log_type).info("添加视频到监控表:{}", video_title)
+                    # 插入空行
+                    time.sleep(1)
+                    Feishu.insert_columns(log_type, "monitor", "N7e2yI", "ROWS", 1, 2)
+                    # 视频信息写入监控表
+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(upload_time))),
+                               video_id,
+                               video_title,
+                               our_video_link,
+                               profile_id,
+                               profile_mid,
+                               user_name,
+                               video_duration,
+                               time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
+                               video_play_cnt]]
+                    time.sleep(1)
+                    Feishu.update_values(log_type, "monitor", "N7e2yI", "F2:O2", values)
                 return int(video_send_time)
         except Exception as e:
-            Common.person_logger().error("请求关注列表异常:{}", e)
+            Common.logger(log_type).error("下载/上传视频异常:{}", e)
 
 
 if __name__ == "__main__":
@@ -427,4 +454,9 @@ if __name__ == "__main__":
     # person.download_person_videos()
     # person.sub_persons()
     # print(person.unfollow_person_list())
-    person.unsub_persons()
+    # person.unsub_persons()
+    print(person.person_uid)
+    print(person.person_token)
+    print(person.person_referer)
+    print(person.person_x_token_id)
+    print(person.person_x_b3_traceid)

+ 48 - 35
main/publish.py

@@ -17,7 +17,7 @@ proxies = {"http": None, "https": None}
 
 class Publish:
     @classmethod
-    def publish_video_dev(cls, request_data):
+    def publish_video_dev(cls, log_type, request_data):
         """
         loginUid  站内uid (随机)
         appType  默认:888888
@@ -35,14 +35,17 @@ class Publish:
         """
         # Common.logger().info('publish request data: {}'.format(request_data))
         result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
-        # Common.logger().info('publish result: {}'.format(result))
+        # Common.logger(log_type).info('publish result: {}'.format(result))
+        video_id = result["data"]["id"]
+        # Common.logger(log_type).info('video_id: {}'.format(video_id))
         if result['code'] != 0:
-            Common.logger().error('pushlish failure msg = {}'.format(result['msg']))
+            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
         else:
-            Common.logger().info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+        return video_id
 
     @classmethod
-    def publish_video_prod(cls, request_data):
+    def publish_video_prod(cls, log_type, request_data):
         """
         loginUid  站内uid (随机)
         appType  默认:888888
@@ -59,11 +62,14 @@ class Publish:
         :return:
         """
         result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
-        # Common.logger().info('publish result: {}'.format(result))
+        # Common.logger(log_type).info('publish result: {}'.format(result))
+        video_id = result["data"]["id"]
+        # Common.logger(log_type).info('video_id: {}'.format(video_id))
         if result['code'] != 0:
-            Common.logger().error('pushlish failure msg = {}'.format(result['msg']))
+            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
         else:
-            Common.logger().info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+        return video_id
 
     @classmethod
     def request_post(cls, request_url, request_data):
@@ -115,21 +121,21 @@ class Publish:
     oss_file_path_image = 'longvideo/crawler_local/image/{}/{}/{}'
 
     @classmethod
-    def put_file(cls, oss_file, local_file):
+    def put_file(cls, log_type, oss_file, local_file):
         cls.bucket.put_object_from_file(oss_file, local_file)
-        Common.logger().info("put oss file = {}, local file = {} success".format(oss_file, local_file))
+        Common.logger(log_type).info("put oss file = {}, local file = {} success".format(oss_file, local_file))
 
     # 清除本地文件
     @classmethod
-    def remove_local_file(cls, local_file):
+    def remove_local_file(cls, log_type, local_file):
         os.remove(local_file)
-        Common.logger().info("remove local file = {} success".format(local_file))
+        Common.logger(log_type).info("remove local file = {} success".format(local_file))
 
     # 清除本地文件夹
     @classmethod
-    def remove_local_file_dir(cls, local_file):
+    def remove_local_file_dir(cls, log_type, local_file):
         os.rmdir(local_file)
-        Common.logger().info("remove local file dir = {} success".format(local_file))
+        Common.logger(log_type).info("remove local file dir = {} success".format(local_file))
 
     local_file_path = './videos'
     video_file = 'video'
@@ -142,13 +148,14 @@ class Publish:
     uids_prod_play = [20631196, 20631197, 20631198, 20631199, 20631200, 20631201]
 
     @classmethod
-    def upload_and_publish(cls, env, job):
+    def upload_and_publish(cls, log_type, env, job):
         """
         上传视频到 oss
+        :param log_type: 选择的 log
         :param env: 测试环境:dev,正式环境:prod
         :param job: 上升榜:up,播放量:play
         """
-        Common.logger().info("upload_and_publish starting...")
+        Common.logger(log_type).info("upload_and_publish starting...")
         today = time.strftime("%Y%m%d", time.localtime())
         # videos 目录下的所有视频文件夹
         files = os.listdir(cls.local_file_path)
@@ -158,10 +165,13 @@ class Publish:
                 fi_d = os.path.join(cls.local_file_path, f)
                 # 确认为视频文件夹
                 if os.path.isdir(fi_d):
-                    Common.logger().info('dir = {}'.format(fi_d))
+                    Common.logger(log_type).info('dir = {}'.format(fi_d))
                     # 列出所有视频文件夹
                     dir_files = os.listdir(fi_d)
-                    data = {'appType': '888888', 'crawlerSrcCode': 'XIAONIANGAO_XCX', 'viewStatus': '1', 'versionCode': '1'}
+                    data = {'appType': '888888',
+                            'crawlerSrcCode': 'XIAONIANGAO_XCX',
+                            'viewStatus': '1',
+                            'versionCode': '1'}
                     now_timestamp = int(round(time.time() * 1000))
                     data['crawlerTaskTimestamp'] = str(now_timestamp)
                     global uid
@@ -178,7 +188,7 @@ class Publish:
                     for fi in dir_files:
                         # 视频文件夹下的所有文件路径
                         fi_path = fi_d + '/' + fi
-                        Common.logger().info('dir fi_path = {}'.format(fi_path))
+                        Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
                         # 读取 info.txt,赋值给 data
                         if cls.info_file in fi:
                             f = open(fi_path, "r", encoding="UTF-8")
@@ -187,7 +197,7 @@ class Publish:
                                 line = f.readline()
                                 line = line.replace('\n', '')
                                 if line is not None and len(line) != 0 and not line.isspace():
-                                    Common.logger().info("line = {}".format(line))
+                                    # Common.logger(log_type).info("line = {}".format(line))
                                     if i == 0:
                                         data['crawlerSrcId'] = line
                                     elif i == 1:
@@ -197,15 +207,15 @@ class Publish:
                                     elif i == 8:
                                         data['crawlerSrcPublishTimestamp'] = line
                                 else:
-                                    Common.logger().warning("{} line is None".format(fi_path))
+                                    Common.logger(log_type).warning("{} line is None".format(fi_path))
                             f.close()
                             # remove info.txt
-                            cls.remove_local_file(fi_path)
+                            cls.remove_local_file(log_type, fi_path)
                     # 刷新数据
                     dir_files = os.listdir(fi_d)
                     for fi in dir_files:
                         fi_path = fi_d + '/' + fi
-                        Common.logger().info('dir fi_path = {}'.format(fi_path))
+                        # Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
                         # 上传oss
                         if cls.video_file in fi:
                             global oss_video_file
@@ -213,31 +223,34 @@ class Publish:
                                 oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId'])
                             elif env == "prod":
                                 oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId'])
-                            Common.logger().info("oss_video_file = {}".format(oss_video_file))
-                            cls.put_file(oss_video_file, fi_path)
+                            Common.logger(log_type).info("oss_video_file = {}".format(oss_video_file))
+                            cls.put_file(log_type, oss_video_file, fi_path)
                             data['videoPath'] = oss_video_file
-                            Common.logger().info("videoPath = {}".format(oss_video_file))
+                            Common.logger(log_type).info("videoPath = {}".format(oss_video_file))
                         elif cls.image_file in fi:
                             global oss_image_file
                             if env == "dev":
                                 oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId'])
                             elif env == "prod":
                                 oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId'])
-                            Common.logger().info("oss_image_file = {}".format(oss_image_file))
-                            cls.put_file(oss_image_file, fi_path)
+                            Common.logger(log_type).info("oss_image_file = {}".format(oss_image_file))
+                            cls.put_file(log_type, oss_image_file, fi_path)
                             data['coverImgPath'] = oss_image_file
-                            Common.logger().info("coverImgPath = {}".format(oss_image_file))
+                            Common.logger(log_type).info("coverImgPath = {}".format(oss_image_file))
                         # 全部remove
-                        cls.remove_local_file(fi_path)
+                        cls.remove_local_file(log_type, fi_path)
 
                     # 发布
                     if env == "dev":
-                        cls.publish_video_dev(data)
+                        video_id = cls.publish_video_dev(log_type, data)
                     elif env == "prod":
-                        cls.publish_video_prod(data)
-                    cls.remove_local_file_dir(fi_d)
+                        video_id = cls.publish_video_prod(log_type, data)
+                    else:
+                        video_id = cls.publish_video_dev(log_type, data)
+                    cls.remove_local_file_dir(log_type, fi_d)
+                    return video_id
 
                 else:
-                    Common.logger().error('file not a dir = {}'.format(fi_d))
+                    Common.logger(log_type).error('file not a dir = {}'.format(fi_d))
             except Exception as e:
-                Common.logger().exception('upload_and_publish error', e)
+                Common.logger(log_type).exception('upload_and_publish error', e)

+ 16 - 16
main/run_hour_list.py

@@ -18,35 +18,36 @@ def hour_list_job():
         yesterday = (datetime.date.today() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
         # 前天
         before_yesterday = (datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d")
+        Common.del_logs("hour")
 
-        Common.logger().info("检查今日上升榜日期是否存在")
-        HourList.check_hour_list_data(today)
+        Common.logger("hour").info("检查今日上升榜日期是否存在")
+        HourList.check_hour_list_data("hour", today)
 
         while True:
             # 获取符合规则的视频,写入列表
-            HourList.get_hour_list_feeds()
+            HourList.get_hour_list_feeds("hour")
             time.sleep(1)
             hour_list_job_time = datetime.datetime.now()
             if hour_list_job_time.hour == 10 and 0 <= hour_list_job_time.minute <= 10:
 
-                Common.logger().info("开始更新上升榜")
-                HourList.update_hour_list_data(today, yesterday, before_yesterday)
-                Common.logger().info("开始下载上升榜")
-                HourList.download_and_publish()
+                Common.logger("hour").info("开始更新上升榜")
+                HourList.update_hour_list_data("hour", today, yesterday, before_yesterday)
+                Common.logger("hour").info("开始下载上升榜")
+                HourList.download_and_publish("hour")
 
             elif hour_list_job_time.hour == 15 and hour_list_job_time.minute <= 10:
 
-                Common.logger().info("开始更新上升榜")
-                HourList.update_hour_list_data(today, yesterday, before_yesterday)
-                Common.logger().info("开始下载上升榜")
-                HourList.download_and_publish()
+                Common.logger("hour").info("开始更新上升榜")
+                HourList.update_hour_list_data("hour", today, yesterday, before_yesterday)
+                Common.logger("hour").info("开始下载上升榜")
+                HourList.download_and_publish("hour")
 
             elif hour_list_job_time.hour == 20 and hour_list_job_time.minute <= 10:
 
-                Common.logger().info("开始更新上升榜")
-                HourList.update_hour_list_data(today, yesterday, before_yesterday)
-                Common.logger().info("开始下载上升榜")
-                HourList.download_and_publish()
+                Common.logger("hour").info("开始更新上升榜")
+                HourList.update_hour_list_data("hour", today, yesterday, before_yesterday)
+                Common.logger("hour").info("开始下载上升榜")
+                HourList.download_and_publish("hour")
 
             elif hour_list_job_time.hour == 23 and hour_list_job_time.minute >= 50:
 
@@ -55,4 +56,3 @@ def hour_list_job():
 
 if __name__ == "__main__":
     hour_list_job()
-    # HourList.download_and_publish()

+ 8 - 7
main/run_person_list.py

@@ -15,12 +15,13 @@ from main.person_list import Person
 
 def person_list_job():
     while True:
-        Common.person_logger().info("开始抓取小年糕关注榜")
+        Common.del_logs("person")
+        Common.logger("person").info("开始抓取小年糕关注榜")
         # 关注用户列表
-        Person.sub_persons()
+        Person.sub_persons("person")
         time.sleep(1)
         # 取消关注用户
-        Person.unsub_persons()
+        Person.unsub_persons("person")
         time.sleep(1)
 
         while True:
@@ -33,14 +34,14 @@ def person_list_job():
             endtime = before_yesterday
             person_list_time = datetime.datetime.now()
 
-            if Person.download_from_sub(endtime) >= endtime:
-                Person.download_from_sub(endtime)
+            if Person.download_from_sub("person", endtime) >= endtime:
+                Person.download_from_sub("person", endtime)
             elif person_list_time.hour == 23 and person_list_time.minute >= 50:
-                Common.person_logger().info("结束今日抓取任务")
+                Common.logger("person").info("结束今日抓取任务")
                 Person.next_t_list = [-1]
                 break
             else:
-                Common.person_logger().info("发布时间大于48小时,结束抓取任务")
+                Common.logger("person").info("发布时间大于48小时,结束抓取任务")
                 time.sleep(3)
                 Person.next_t_list = [-1]
                 break

+ 0 - 3
person-logs/__init__.py

@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/5/18

+ 0 - 4
requirements.txt

@@ -1,4 +0,0 @@
-loguru==0.6.0
-oss2==2.15.0
-requests==2.27.1
-urllib3==1.26.9

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است