Browse Source

变更表结构,代码优化

wangkun 3 năm trước cách đây
mục cha
commit
ef2b2a547d

+ 56 - 1
README.md

@@ -1 +1,56 @@
-看一看+小程序爬虫
+看一看+小程序爬虫:推荐页
+
+python==3.10.0
+loguru==0.6.0
+oss2==2.15.0
+requests==2.27.1
+urllib3==1.26.9
+
+执行入口:
+cd ./crawler-kanyikan-Windows
+python3 main/run.py
+
+==========2022/4/21===========
+- 视频发布7日内,播放量大于1万(当前时间 - 发布时间 <= 7 天)
+- 任务执行规则:
+    1.凌晨0点-10点      7日内播放大于1万 爬取
+    2.早上10点-20点     内容上升榜 爬取
+    3.晚上20点-24点     15万播放爬取
+
+
+==========2022/4/15===========
+- 视频发布3日内,播放量大于2万(当前时间 - 发布时间 <= 3 天)
+- 视频时长1分钟以上,10分钟以下
+- 分辨率 宽或高大于720
+- 分享量>0
+- 站内标题=看一看视频原标题
+- 站内封面图=看一看视频原封面图
+- 任务执行规则:
+    1.凌晨0点-10点      3日内播放大于2万 爬取
+    2.早上10点-20点     内容上升榜 爬取
+    3.晚上20点-24点     15万播放爬取
+
+
+==========2022/3/29===========
+1.凌晨5:00 - 21:00,跑上升榜爬虫(循环隔 1 小时,检查播放量>=1000)
+2.晚上21:00 - 5:00,跑播放量爬虫(播放量>=200000)
+3.视频下载后,立即上传
+
+
+==========2022/3/24===========
+1.周一至周五,跑上升榜爬虫(循环隔 1 小时,检查播放量>=1000)
+2.周六至周日,跑播放量爬虫(播放量>=200000)
+3.视频下载后,立即上传
+4.下周一时,暂停播放量爬虫,恢复上升榜爬虫
+
+
+==========2022/3/15===========
+1.拿到外网视频 list
+2.去重
+3.获取当前抓取时间、以及播放量、加上基本规则,存储本地:
+	3.1 分辨率,宽或者高 >= 720 或 无分辨
+	3.2 600 >= 时长 >= 60
+4.循环隔 1 小时,检查播放量 >=1000(当前播放量 - 1 小时前的播放量),开始抓取。同时从本地存储中删除,加入到去重文本中。
+5.下载总条数,先不限制
+6.下载时间:早上 8 点 - 晚上 21 点截止
+7.下载完成后立即上传

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 0 - 0
chlsfiles/charles202205071009.chlsj


Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 0 - 0
chlsfiles/charles202206131711.txt


+ 0 - 63
main/common.py

@@ -8,7 +8,6 @@
 import json
 from datetime import date, timedelta
 import datetime
-import logging
 import os
 import time
 import requests
@@ -28,29 +27,6 @@ class Common:
     # 明天 <class 'str'>  2022-04-15
     tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
 
-    # 使用 logging 模块生成日志
-    @staticmethod
-    def crawler_log():
-        """
-        生成 log 日志
-        """
-        # 日志路径
-        log_dir = r"./logs/"
-        log_path = os.getcwd() + os.sep + log_dir
-        if not os.path.isdir(log_path):
-            os.makedirs(log_path)
-
-        # 日志参数
-        log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-        date_format = "%Y-%m-%d %p %H:%M:%S"
-        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
-
-        # 日志初始化
-        logging.basicConfig(filename=log_path + log_name, level=logging.INFO, format=log_format, datefmt=date_format)
-        crawler_logger = logging.getLogger("crawler-log")
-
-        return crawler_logger
-
     # 使用 logger 模块生成日志
     @staticmethod
     def logger():
@@ -246,45 +222,6 @@ class Common:
                 time.sleep(30)
                 cls.get_session()
 
-    @classmethod
-    def kanyikan_download_count(cls):
-        videoid_path = r"./txt/kanyikan_videoid.txt"
-        count = 0
-        for count, line in enumerate(open(videoid_path, "rb").readlines()):
-            count += 1
-        cls.logger().info('累计下载视频数: {}\n', count)
-
-    @classmethod
-    def kanyikan_today_download_count(cls):
-        """
-        统计快手渠道当日下载视频数
-        :return:
-        """
-        # 创建空文件
-        with open(r"./txt/" + str(cls.today) + "_kanyikan_videoid.txt", "a") as f:
-            f.write("")
-        videoid_path = r"./txt/" + str(cls.today) + "_kanyikan_videoid.txt"
-        count = 0
-        for count, line in enumerate(open(videoid_path, "rb").readlines()):
-            count += 1
-        return count
-
-    @classmethod
-    def del_yesterday_kanyikan_videoid_txt(cls):
-        """
-        删除快手渠道昨日下载视频数的 txt 文件
-        :return:
-        """
-        yesterday_kanyikan_videoid_txt_dir = r"./txt/"
-        all_files = sorted(os.listdir(yesterday_kanyikan_videoid_txt_dir))
-        for file in all_files:
-            name = os.path.splitext(file)[0]
-            if name == cls.yesterday + "_kanyikan_videoid":
-                os.remove(yesterday_kanyikan_videoid_txt_dir + file)
-        Common.logger().info("删除快手昨天下载统计文件成功")
-
 
 if __name__ == "__main__":
     common = Common()
-    common.del_yesterday_kanyikan_videoid_txt()
-    print(common.kanyikan_today_download_count())

+ 41 - 0
main/demo.py

@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/6/13
+import json
+import time
+
+import requests
+
+
+class Demo:
+
+    @classmethod
+    def get_video_info(cls):
+        url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
+        param = {
+            "session": "MSfFEF48cx1fSLhB3c2uEinhfSzecBHMDPVKmpcciDtwmG92mzZn4tZ5oLQf9RtfLLzxW3Ns4uodIvEAFlx28hPTGjdCxSVbfJ4kDyscORaqxapOp45k8BcinewqxyR-pwRVtP0k-AjrO2AhpziB3qp7o1TcTILcR6Dq5s5TmC6PzY3OuaPBaVAMyT1HnJO0R7HN62UkueNwzkqAbCr1-AIX-mpe0QJ-IIvV7jeoMRNGeqwMVR1vHTMEGdSo1OFv94Ga4w-gQFABhL3r6Ovly9VHpFX1Vv1JGRwP05R6u3eX6G-9VoSdCQ_tb92z9uT9RZXsGfet2HRTtGPYi7XIPRGa0pv1-59H6ZVBfa7q2lR_NJBBbfgEovXiCfUDjRSRsCZs8FRDrZOZTRKlVxKCzOI3YCI2y2SEelfVqo97clTTAxXDqXG1MximDxh3gw_k",
+            "vid": "ugc_bukozhr",
+            "wxaVersion": "3.9.2",
+            "channelid": "208201",
+            "scene": "32",
+            "subscene": "1089",
+            "model": "iPhone 11<iPhone12,1>14.7.1",
+            "clientVersion": "8.0.18",
+            "sharesearchid": "447665862521758270",
+            "sharesource": "-1"
+        }
+        r = requests.get(url=url, params=param)
+        response = json.loads(r.content.decode("utf8"))
+        print(response["data"]["openid"])
+
+    @classmethod
+    def strtime_to_int(cls):
+        download_time = "2022/06/13 17:21:01"
+        download_time = int(time.mktime(time.strptime(download_time, "%Y/%m/%d %H:%M:%S")))
+        print(download_time)
+
+
+if __name__ == "__main__":
+    demo = Demo()
+    # demo.get_video_info()
+    demo.strtime_to_int()

+ 68 - 48
main/download_play.py

@@ -47,31 +47,56 @@ class DownloadPlay:
             if len(Feishu.get_values_batch("SdCHOM")) == 1:
                 pass
             else:
-                for i in range(len(Feishu.get_values_batch("SdCHOM"))):
+                for i in range(1, len(Feishu.get_values_batch("SdCHOM"))+1):
                     time.sleep(1)
                     try:
-                        download_video_id = Feishu.get_values_batch("SdCHOM")[i+1][1]
-                        download_video_play_cnt = Feishu.get_values_batch("SdCHOM")[i+1][2]
-                        download_video_title = Feishu.get_values_batch("SdCHOM")[i+1][3]
-                        download_video_duration = Feishu.get_values_batch("SdCHOM")[i+1][4]
-                        download_video_comment_cnt = Feishu.get_values_batch("SdCHOM")[i+1][5]
-                        download_video_like_cnt = Feishu.get_values_batch("SdCHOM")[i+1][6]
-                        download_video_share_cnt = Feishu.get_values_batch("SdCHOM")[i+1][7]
-                        download_video_resolution = Feishu.get_values_batch("SdCHOM")[i+1][8]
+                        # download_push_time = Feishu.get_values_batch("SdCHOM")[i][0]
+                        download_video_id = Feishu.get_values_batch("SdCHOM")[i][2]
+                        download_video_title = Feishu.get_values_batch("SdCHOM")[i][3]
+                        download_video_play_cnt = Feishu.get_values_batch("SdCHOM")[i][4]
+                        download_video_comment_cnt = Feishu.get_values_batch("SdCHOM")[i][5]
+                        download_video_like_cnt = Feishu.get_values_batch("SdCHOM")[i][6]
+                        download_video_share_cnt = Feishu.get_values_batch("SdCHOM")[i][7]
+                        download_video_duration = Feishu.get_values_batch("SdCHOM")[i][8]
+                        download_video_resolution = Feishu.get_values_batch("SdCHOM")[i][9]
                         download_video_width = download_video_resolution.split("*")[0]
                         download_video_height = download_video_resolution.split("*")[-1]
-                        download_video_send_time = Feishu.get_values_batch("SdCHOM")[i+1][9]
-                        download_user_name = Feishu.get_values_batch("SdCHOM")[i+1][10]
-                        download_head_url = Feishu.get_values_batch("SdCHOM")[i+1][11]
-                        download_cover_url = Feishu.get_values_batch("SdCHOM")[i+1][12]
-                        download_video_url = Feishu.get_values_batch("SdCHOM")[i+1][13]
-                        download_video_session = Feishu.get_values_batch("SdCHOM")[i+1][14]
+                        download_video_send_time = Feishu.get_values_batch("SdCHOM")[i][10]
+                        download_user_name = Feishu.get_values_batch("SdCHOM")[i][11]
+                        download_user_id = Feishu.get_values_batch("SdCHOM")[i][12]
+                        download_head_url = Feishu.get_values_batch("SdCHOM")[i][13][0]["link"]
+                        download_cover_url = Feishu.get_values_batch("SdCHOM")[i][14][0]["link"]
+                        download_video_url = Feishu.get_values_batch("SdCHOM")[i][15][0]["link"]
 
-                        if download_video_id not in [j for i in Feishu.get_values_batch("20ce0c") for j in i]\
-                                and cls.play_rule(download_video_width, download_video_height,
-                                                  download_video_duration, download_video_play_cnt) is True:
-                            Common.logger().info("开始下载视频:{}", download_video_title)
+                        # Common.logger().info("download_video_id:{}", download_video_id)
+                        # Common.logger().info("download_video_title:{}", download_video_title)
+                        # Common.logger().info("download_video_play_cnt:{}", download_video_play_cnt)
+                        # Common.logger().info("download_video_comment_cnt:{}", download_video_comment_cnt)
+                        # Common.logger().info("download_video_like_cnt:{}", download_video_like_cnt)
+                        # Common.logger().info("download_video_share_cnt:{}", download_video_share_cnt)
+                        # Common.logger().info("download_video_duration:{}", download_video_duration)
+                        # Common.logger().info("download_video_resolution:{}", download_video_resolution)
+                        # Common.logger().info("download_video_send_time:{}", download_video_send_time)
+                        # Common.logger().info("download_user_name:{}", download_user_name)
+                        # Common.logger().info("download_user_id:{}", download_user_id)
+                        # Common.logger().info("download_head_url:{}", download_head_url)
+                        # Common.logger().info("download_cover_url:{}", download_cover_url)
+                        # Common.logger().info("download_video_url:{}", download_video_url)
+
+                        Common.logger().info("正在判断第{}行,视频:{}", i, download_video_title)
 
+                        if cls.play_rule(
+                                play_width=download_video_width, play_height=download_video_height,
+                                play_duration=download_video_duration, play_play_cnt=download_video_play_cnt) is False:
+                            Common.logger().info("不满足播放量下载规则,删除该视频信息:{}", download_video_title)
+                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                            Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        elif download_video_id in [j for m in Feishu.get_values_batch("20ce0c") for j in m]:
+                            Common.logger().info("视频已下载,删除该视频信息:{}", download_video_title)
+                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                            Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        else:
+                            Common.logger().info("开始下载视频:{}", download_video_title)
                             # 下载封面
                             Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
                             # 下载视频
@@ -87,12 +112,13 @@ class DownloadPlay:
                                           str(download_video_like_cnt) + "\n" +
                                           str(download_video_share_cnt) + "\n" +
                                           str(download_video_resolution) + "\n" +
-                                          str(download_video_send_time) + "\n" +
+                                          str(int(time.mktime(
+                                              time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
                                           str(download_user_name) + "\n" +
                                           str(download_head_url) + "\n" +
                                           str(download_video_url) + "\n" +
                                           str(download_cover_url) + "\n" +
-                                          str(download_video_session))
+                                          str(Common.get_session()))
                             Common.logger().info("==========视频信息已保存至info.txt==========")
 
                             # 上传视频
@@ -102,43 +128,37 @@ class DownloadPlay:
                             # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
                             Common.logger().info("保存视频ID至云文档:{}", download_video_title)
                             # 看一看+ ,视频ID工作表,插入首行
-                            Feishu.insert_columns("20ce0c")
+                            Feishu.insert_columns("20ce0c", "ROWS", 1, 2)
                             # 看一看+ ,视频ID工作表,首行写入数据
                             upload_time = int(time.time())
-                            Feishu.update_values("20ce0c",
-                                                 str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time))),
-                                                 str(download_video_id),
-                                                 str(download_video_play_cnt),
-                                                 str(download_video_title),
-                                                 str(download_video_duration),
-                                                 str(download_video_comment_cnt),
-                                                 str(download_video_like_cnt),
-                                                 str(download_video_share_cnt),
-                                                 str(download_video_resolution),
-                                                 str(time.strftime("%Y-%m-%d %H:%M:%S",
-                                                                   time.localtime(
-                                                                       int(download_video_send_time)))),
-                                                 str(download_user_name),
-                                                 str(download_head_url),
-                                                 str(download_cover_url),
-                                                 str(download_video_url),
-                                                 str(download_video_session))
+                            values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                                       "播放量榜",
+                                       str(download_video_id),
+                                       str(download_video_title),
+                                       download_video_play_cnt,
+                                       download_video_comment_cnt,
+                                       download_video_like_cnt,
+                                       download_video_share_cnt,
+                                       download_video_duration,
+                                       str(download_video_resolution),
+                                       str(download_video_send_time),
+                                       str(download_user_name),
+                                       str(download_user_id),
+                                       str(download_head_url),
+                                       str(download_cover_url),
+                                       str(download_video_url)]]
+                            time.sleep(1)
+                            Feishu.update_values("20ce0c", "A2:Q2", values)
 
                             # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
                             Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
                             # 删除行或列,可选 ROWS、COLUMNS
-                            Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
-
-                        else:
-                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                            Common.logger().info("该视频不满足下载规则,删除在云文档中的信息:{}", download_video_title)
-                            # 删除行或列,可选 ROWS、COLUMNS
-                            Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                            Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
 
                     except Exception as e:
                         Common.logger().error("视频 info 异常,删除该视频信息", e)
                         # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
 
                 cls.download_play_video("prod")
         except Exception as e:

+ 107 - 94
main/download_sendtime.py

@@ -55,13 +55,13 @@ class DownloadSendtime:
             if len(Feishu.get_values_batch("SdCHOM")) == 1:
                 pass
             else:
-                for i in range(len(Feishu.get_values_batch("SdCHOM"))):
+                for i in range(1, len(Feishu.get_values_batch("SdCHOM"))+1):
                     time.sleep(1)
                     try:
                         sendtime_session = Common.get_session()
-                        Common.logger().info("获取视频info时,session:{}", sendtime_session)
-                        download_video_id = Feishu.get_values_batch("SdCHOM")[i+1][1]
-                        download_video_title = Feishu.get_values_batch("SdCHOM")[i+1][3]
+                        # Common.logger().info("获取视频info时,session:{}", sendtime_session)
+                        download_video_id = Feishu.get_values_batch("SdCHOM")[i][2]
+                        download_video_title = Feishu.get_values_batch("SdCHOM")[i][3]
                         url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
                         param = {
                             "session": sendtime_session,
@@ -81,7 +81,7 @@ class DownloadSendtime:
                         if "data" not in response:
                             Common.logger().error("获取视频info时错误,删除该视频:{}", download_video_title)
                             # 删除行或列,可选 ROWS、COLUMNS
-                            Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                            Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
                         else:
                             data = response["data"]
                             v_duration = data["duration"]
@@ -94,6 +94,7 @@ class DownloadSendtime:
                             v_resolution = str(v_width) + "*" + str(v_height)
                             v_send_date = data["upload_time"]
                             v_username = data["user_info"]["nickname"].strip().replace("\n", "")
+                            v_user_id = data["openid"]
                             v_user_cover = data["user_info"]["headimg_url"]
                             v_video_cover = data["cover_url"]
                             if "items" not in data["play_info"]:
@@ -107,102 +108,114 @@ class DownloadSendtime:
                                 else:
                                     download_url_sendtime = data["play_info"]["items"][0]["play_url"]
 
-                            # 判断基本规则
-                            if download_video_id not in [j for i in Feishu.get_values_batch("20ce0c") for j in i]\
-                                    and cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is True \
-                                    and download_video_id != "" and download_video_title != "" and v_duration != "" \
-                                    and v_play_cnt_sendtime != "" and v_comment_cnt != "" and v_liked_cnt != "" \
-                                    and v_shared_cnt != "" and v_width != "" and v_height != "" \
-                                    and v_send_date != "" and v_username != "" and v_user_cover != "" \
-                                    and v_video_cover != "" and download_url_sendtime != "":
-                                # 满足下载条件:当前时间 - 发布时间 <= 3天,播放量大于1万
-                                if int(time.time()) - int(v_send_date) <= 604800:
-                                    if int(v_play_cnt_sendtime) >= 10000:
-                                        Common.logger().info("该视频:{} ,在7天内的播放量{}>=10000",
-                                                             download_video_title, v_play_cnt_sendtime)
-
-                                        # 下载封面
-                                        Common.download_method("cover", download_video_title, v_video_cover)
-                                        # 下载视频
-                                        Common.download_method("video", download_video_title, download_url_sendtime)
-                                        # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                                        with open(r"./videos/" + download_video_title +
-                                                  "/" + "info.txt", "a", encoding="utf8") as f_a2:
-                                            f_a2.write(str(download_video_id) + "\n" +
-                                                       str(download_video_title) + "\n" +
-                                                       str(v_duration) + "\n" +
-                                                       str(v_play_cnt_sendtime) + "\n" +
-                                                       str(v_comment_cnt) + "\n" +
-                                                       str(v_liked_cnt) + "\n" +
-                                                       str(v_shared_cnt) + "\n" +
-                                                       str(v_resolution) + "\n" +
-                                                       str(v_send_date) + "\n" +
-                                                       str(v_username) + "\n" +
-                                                       str(v_user_cover) + "\n" +
-                                                       str(download_url_sendtime) + "\n" +
-                                                       str(v_video_cover) + "\n" +
-                                                       str(sendtime_session))
-                                        Common.logger().info("==========视频信息已保存至info.txt==========")
-
-                                        # 上传该视频
-                                        Common.logger().info("开始上传视频:{}", download_video_title)
-                                        Publish.upload_and_publish(env, "send_time")
-
-                                        # 保存视频 ID 到云文档:
-                                        # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                                        Common.logger().info("保存视频ID至云文档:{}", download_video_title)
-                                        # 看一看+ ,视频ID工作表,插入首行
-                                        Feishu.insert_columns("20ce0c")
-                                        # 看一看+ ,视频ID工作表,首行写入数据
-                                        upload_time = int(time.time())
-                                        Feishu.update_values("20ce0c",
-                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
-                                                                               time.localtime(upload_time))),
-                                                             str(download_video_id),
-                                                             str(v_play_cnt_sendtime),
-                                                             str(download_video_title),
-                                                             str(v_duration),
-                                                             str(v_comment_cnt),
-                                                             str(v_liked_cnt),
-                                                             str(v_shared_cnt),
-                                                             str(v_resolution),
-                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
-                                                                               time.localtime(int(v_send_date)))),
-                                                             str(v_username),
-                                                             str(v_user_cover),
-                                                             str(v_video_cover),
-                                                             str(download_url_sendtime),
-                                                             str(sendtime_session))
-
-                                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                                        Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
-                                        # 删除行或列,可选 ROWS、COLUMNS
-                                        Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
-
-                                    else:
-                                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                                        Common.logger().info("该视频7天播放量:{}<10000 ;不满足下载规则:{}",
-                                                             int(v_play_cnt_sendtime), download_video_title)
-                                        # 删除行或列,可选 ROWS、COLUMNS
-                                        Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
-                                else:
-                                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                                    Common.logger().info("视频发布时间大于7天:{}天;标题:{}",
-                                                         int((int(time.time()) - int(v_send_date)) / 86400),
-                                                         download_video_title)
-                                    # 删除行或列,可选 ROWS、COLUMNS
-                                    Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                            Common.logger().info("正在判断第{}行,视频:{}", i, download_video_title)
+
+                            # 判断无效视频
+                            if download_video_id == "" \
+                                    or download_video_id is None\
+                                    and download_video_title == ""\
+                                    or download_video_title is None\
+                                    and v_duration == "" \
+                                    and v_play_cnt_sendtime == ""\
+                                    and v_send_date == ""\
+                                    and v_user_cover == "" \
+                                    and v_video_cover == ""\
+                                    and download_url_sendtime == "":
+                                Common.logger().info("无效视频,删除该视频信息:{}", download_video_title)
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            # 发布时间榜下载规则
+                            elif cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is False:
+                                Common.logger().info("不满足发布时间榜下载规则,删除该视频信息:{}", download_video_title)
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            # 发布时间 <=7 天
+                            elif int(time.time()) - int(v_send_date) > 604800:
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Common.logger().info("视频发布时间大于7天:{}天;标题:{}",
+                                                     int((int(time.time()) - int(v_send_date)) / 86400),
+                                                     download_video_title)
+                                # 删除行或列,可选 ROWS、COLUMNS
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            elif int(v_play_cnt_sendtime) < 10000:
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Common.logger().info("该视频7天播放量:{}<10000 ;不满足下载规则:{}",
+                                                     int(v_play_cnt_sendtime), download_video_title)
+                                # 删除行或列,可选 ROWS、COLUMNS
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            elif download_video_id in [j for m in Feishu.get_values_batch("20ce0c") for j in m]:
+                                Common.logger().info("视频已下载,删除该视频信息:{}", download_video_title)
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
                             else:
+                                Common.logger().info("开始下载视频:{}", download_video_title)
+
+                                # 下载封面
+                                Common.download_method("cover", download_video_title, v_video_cover)
+                                # 下载视频
+                                Common.download_method("video", download_video_title, download_url_sendtime)
+                                # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
+                                with open(r"./videos/" + download_video_title +
+                                          "/" + "info.txt", "a", encoding="utf8") as f_a2:
+                                    f_a2.write(str(download_video_id) + "\n" +
+                                               str(download_video_title) + "\n" +
+                                               str(v_duration) + "\n" +
+                                               str(v_play_cnt_sendtime) + "\n" +
+                                               str(v_comment_cnt) + "\n" +
+                                               str(v_liked_cnt) + "\n" +
+                                               str(v_shared_cnt) + "\n" +
+                                               str(v_resolution) + "\n" +
+                                               str(v_send_date) + "\n" +
+                                               str(v_username) + "\n" +
+                                               str(v_user_cover) + "\n" +
+                                               str(download_url_sendtime) + "\n" +
+                                               str(v_video_cover) + "\n" +
+                                               str(sendtime_session))
+                                Common.logger().info("==========视频信息已保存至info.txt==========")
+
+                                # 上传该视频
+                                Common.logger().info("开始上传视频:{}", download_video_title)
+                                Publish.upload_and_publish(env, "send_time")
+
+                                # 保存视频 ID 到云文档:
+                                # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                                Common.logger().info("保存视频ID至云文档:{}", download_video_title)
+                                # 看一看+ ,视频ID工作表,插入首行
+                                Feishu.insert_columns("20ce0c", "rows", 1, 2)
+                                # 看一看+ ,视频ID工作表,首行写入数据
+                                upload_time = int(time.time())
+                                values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                                           "发布时间榜",
+                                           str(download_video_id),
+                                           str(download_video_title),
+                                           v_play_cnt_sendtime,
+                                           v_comment_cnt,
+                                           v_liked_cnt,
+                                           v_shared_cnt,
+                                           v_duration,
+                                           v_resolution,
+                                           time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
+                                           str(v_username),
+                                           str(v_user_id),
+                                           str(v_user_cover),
+                                           str(v_video_cover),
+                                           str(download_url_sendtime)]]
+                                time.sleep(1)
+                                Feishu.update_values("20ce0c", "A2:Q2", values)
+
                                 # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                                Common.logger().info("不满足下载规则:{}", download_video_title)
+                                Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
                                 # 删除行或列,可选 ROWS、COLUMNS
-                                Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
 
                     except Exception as e:
                         Common.logger().error("获取视频info异常:{},删除该视频", e)
                         # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
-
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
                 cls.download_sendtime_video("prod")
         except Exception as e:
             Common.logger().error(e)

+ 149 - 124
main/download_up.py

@@ -60,15 +60,15 @@ class DownloadUp:
             if len(Feishu.get_values_batch("SdCHOM")) == 1:
                 pass
             else:
-                for i in range(len(Feishu.get_values_batch("SdCHOM"))):
+                for i in range(1, len(Feishu.get_values_batch("SdCHOM"))+1):
                     time.sleep(1)
                     try:
                         video_info_session = Common.get_session()
-                        Common.logger().info("获取视频info时,session:{}", video_info_session)
-                        download_time = Feishu.get_values_batch("SdCHOM")[i+1][0]  # 第一次获取该视频的时间
-                        download_video_id = Feishu.get_values_batch("SdCHOM")[i+1][1]  # 外网视频 ID
-                        download_video_play_cnt = Feishu.get_values_batch("SdCHOM")[i+1][2]  # 播放量
-                        download_video_title = Feishu.get_values_batch("SdCHOM")[i+1][3]
+                        download_time = Feishu.get_values_batch("SdCHOM")[i][0]  # 第一次获取该视频的时间
+                        download_time = int(time.mktime(time.strptime(download_time, "%Y/%m/%d %H:%M:%S")))
+                        download_video_id = Feishu.get_values_batch("SdCHOM")[i][2]  # 外网视频 ID
+                        download_video_title = Feishu.get_values_batch("SdCHOM")[i][3]  # 视频标题
+                        download_video_play_cnt = Feishu.get_values_batch("SdCHOM")[i][4]  # 播放量
 
                         url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
                         param = {
@@ -89,7 +89,7 @@ class DownloadUp:
                         if "data" not in response:
                             Common.logger().error("获取视频info时错误,删除该视频:{}", download_video_title)
                             # 删除行或列,可选 ROWS、COLUMNS
-                            Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                            Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
                         else:
                             data = response["data"]
                             v_duration = data["duration"]
@@ -102,6 +102,7 @@ class DownloadUp:
                             v_resolution = str(v_width) + "*" + str(v_height)
                             v_send_date = data["upload_time"]
                             v_username = data["user_info"]["nickname"].strip().replace("\n", "")
+                            v_user_id = data["openid"]
                             v_user_cover = data["user_info"]["headimg_url"]
                             v_video_cover = data["cover_url"]
                             if "items" not in data["play_info"]:
@@ -115,130 +116,154 @@ class DownloadUp:
                                 else:
                                     download_url_up = data["play_info"]["items"][0]["play_url"]
 
-                            # 判断基本规则
-                            if download_video_id not in [j for i in Feishu.get_values_batch("20ce0c") for j in i]\
-                                    and cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
-                                    and download_video_id != "" and download_video_title != "" and v_duration != "" \
-                                    and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
-                                    and v_shared_cnt != "" and v_width != "" and v_height != "" \
-                                    and v_send_date != "" and v_username != "" and v_user_cover != "" \
-                                    and v_video_cover != "" and download_url_up != "":
-                                if int(time.time()) - int(download_time) < 3600:
-                                    Common.logger().info("距上次获取该视频时间:{}分钟;{}",
-                                                         int((int(int(time.time()) - int(download_time))) / 60),
-                                                         download_video_title)
-                                elif 7200 >= int(time.time()) - int(download_time) >= 3600:
-                                    if int(v_play_cnt_up) - int(download_video_play_cnt) >= 1000:
-                                        Common.logger().info("该视频:{}在1小时内的播放量{}>=1000",
-                                                             download_video_title,
-                                                             int(v_play_cnt_up) - int(download_video_play_cnt))
-
-                                        # 下载封面
-                                        Common.download_method("cover", download_video_title, v_video_cover)
-                                        # 下载视频
-                                        Common.download_method("video", download_video_title, download_url_up)
-                                        # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                                        with open(r"./videos/" + download_video_title
-                                                  + "/" + "info.txt", "a", encoding="utf8") as f_a2:
-                                            f_a2.write(str(download_video_id) + "\n" +
-                                                       str(download_video_title) + "\n" +
-                                                       str(v_duration) + "\n" +
-                                                       str(v_play_cnt_up) + "\n" +
-                                                       str(v_comment_cnt) + "\n" +
-                                                       str(v_liked_cnt) + "\n" +
-                                                       str(v_shared_cnt) + "\n" +
-                                                       str(v_resolution) + "\n" +
-                                                       str(v_send_date) + "\n" +
-                                                       str(v_username) + "\n" +
-                                                       str(v_user_cover) + "\n" +
-                                                       str(download_url_up) + "\n" +
-                                                       str(v_video_cover) + "\n" +
-                                                       str(video_info_session))
-                                        Common.logger().info("==========视频信息已保存至info.txt==========")
-
-                                        # 上传该视频
-                                        Common.logger().info("开始上传视频:{}", download_video_title)
-                                        Publish.upload_and_publish(env, "up")
-
-                                        # 保存视频 ID 到云文档:
-                                        # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                                        Common.logger().info("保存视频ID至云文档:{}", download_video_title)
-                                        # 看一看+ ,视频ID工作表,插入首行
-                                        Feishu.insert_columns("20ce0c")
-                                        # 看一看+ ,视频ID工作表,首行写入数据
-                                        upload_time = int(time.time())
-                                        Feishu.update_values("20ce0c",
-                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
-                                                                               time.localtime(upload_time))),
-                                                             str(download_video_id),
-                                                             str(v_play_cnt_up),
-                                                             str(download_video_title),
-                                                             str(v_duration),
-                                                             str(v_comment_cnt),
-                                                             str(v_liked_cnt),
-                                                             str(v_shared_cnt),
-                                                             str(v_resolution),
-                                                             str(time.strftime("%Y-%m-%d %H:%M:%S",
-                                                                               time.localtime(int(v_send_date)))),
-                                                             str(v_username),
-                                                             str(v_user_cover),
-                                                             str(v_video_cover),
-                                                             str(download_url_up),
-                                                             str(video_info_session))
-
-                                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                                        Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
-                                        # 删除行或列,可选 ROWS、COLUMNS
-                                        Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
-                                    else:
-                                        # 删除之前保存的该视频信息,并把现在的信息保存进去
-                                        Common.logger().info("该视频1小时内的播放量:{}<1000;更新该视频信息:{}",
-                                                             int(v_play_cnt_up) - int(download_video_play_cnt),
-                                                             download_video_title)
-                                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                                        Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
-                                        # 删除行或列,可选 ROWS、COLUMNS
-                                        Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
-
-                                        # 看一看+工作表,插入首行
-                                        Feishu.insert_columns("SdCHOM")
-
-                                        # 获取当前时间
-                                        download_up_time = int(time.time())
-                                        # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
-                                        Feishu.update_values("SdCHOM",
-                                                             a1=str(download_up_time),
-                                                             b1=str(download_video_id),
-                                                             c1=str(v_play_cnt_up),
-                                                             d1=str(download_video_title),
-                                                             e1=str(v_duration),
-                                                             f1=str(v_comment_cnt),
-                                                             g1=str(v_liked_cnt),
-                                                             h1=str(v_shared_cnt),
-                                                             i1=str(v_resolution),
-                                                             j1=str(v_send_date),
-                                                             k1=str(v_username),
-                                                             l1=str(v_user_cover),
-                                                             m1=str(v_video_cover),
-                                                             n1=str(download_url_up),
-                                                             o1=str(video_info_session))
-                                elif int(time.time()) - int(download_time) > 7200:
-                                    Common.logger().info("距上次获取该视频时间:""{}分钟。超过2小时,删除该视频;标题:{}",
-                                                         int((int(time.time()) - int(download_time)) / 60),
-                                                         download_video_title)
-                                    # 删除行或列,可选 ROWS、COLUMNS
-                                    Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
-                            else:
-                                Common.logger().info("不满足下载规则:{}", download_video_title)
+                            Common.logger().info("正在判断第{}行,视频:{}", i, download_video_title)
+
+                            # 判断无效视频
+                            if download_video_id == "" \
+                                    or download_video_id is None\
+                                    and download_video_title == ""\
+                                    or download_video_title is None\
+                                    and v_duration == "" \
+                                    and v_play_cnt_up == ""\
+                                    and v_send_date == ""\
+                                    and v_user_cover == "" \
+                                    and v_video_cover == ""\
+                                    and download_url_up == "":
+                                Common.logger().info("无效视频,删除该视频信息:{}", download_video_title)
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            # 上升榜时长不足 1 小时
+                            elif int(time.time()) - int(download_time) < 3600:
+                                Common.logger().info("距上次获取该视频时间:{}分钟;{}",
+                                                     int((int(int(time.time()) - int(download_time))) / 60),
+                                                     download_video_title)
+
+                            # 上升榜时长超过 2 小时
+                            elif int(time.time()) - int(download_time) > 7200:
+                                Common.logger().info("距上次获取该视频时间:""{}分钟。超过2小时,删除该视频;标题:{}",
+                                                     int((int(time.time()) - int(download_time)) / 60),
+                                                     download_video_title)
+                                # 删除行或列,可选 ROWS、COLUMNS
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            # 上升榜下载规则
+                            elif cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is False:
+                                Common.logger().info("不满足上升榜下载规则,删除视频:{}", download_video_title)
+                                # 删除行或列,可选 ROWS、COLUMNS
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            # 从已下载视频表中去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                            elif download_video_id in [j for m in Feishu.get_values_batch("20ce0c") for j in m]:
+                                Common.logger().info("视频已下载,删除该视频信息:{}", download_video_title)
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                            # 上升榜时长在 1-2 小时内,播放量增长>=1000
+                            elif (7200 >= int(time.time()) - int(download_time) >= 3600)\
+                                    and (int(v_play_cnt_up) - int(download_video_play_cnt) >= 1000):
+                                Common.logger().info(
+                                    "视频:{},在上升榜时间内的播放量{}>=1000,开始下载视频",
+                                    download_video_title, int(v_play_cnt_up) - int(download_video_play_cnt))
+
+                                # 下载封面
+                                Common.download_method("cover", download_video_title, v_video_cover)
+                                # 下载视频
+                                Common.download_method("video", download_video_title, download_url_up)
+                                # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
+                                with open(r"./videos/" + download_video_title
+                                          + "/" + "info.txt", "a", encoding="utf8") as f_a2:
+                                    f_a2.write(str(download_video_id) + "\n" +
+                                               str(download_video_title) + "\n" +
+                                               str(v_duration) + "\n" +
+                                               str(v_play_cnt_up) + "\n" +
+                                               str(v_comment_cnt) + "\n" +
+                                               str(v_liked_cnt) + "\n" +
+                                               str(v_shared_cnt) + "\n" +
+                                               str(v_resolution) + "\n" +
+                                               str(v_send_date) + "\n" +
+                                               str(v_username) + "\n" +
+                                               str(v_user_cover) + "\n" +
+                                               str(download_url_up) + "\n" +
+                                               str(v_video_cover) + "\n" +
+                                               str(video_info_session))
+                                Common.logger().info("==========视频信息已保存至info.txt==========")
+
+                                # 上传该视频
+                                Common.logger().info("开始上传视频:{}", download_video_title)
+                                Publish.upload_and_publish(env, "up")
+
+                                # 保存视频 ID 到云文档:
+                                # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                                Common.logger().info("保存视频ID至云文档:{}", download_video_title)
+                                # 看一看+ ,视频ID工作表,插入首行
+                                Feishu.insert_columns("20ce0c", "ROWS", 1, 2)
+                                # 看一看+ ,视频ID工作表,首行写入数据
+                                upload_time = int(time.time())
+                                values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                                           "上升榜",
+                                           str(download_video_id),
+                                           str(download_video_title),
+                                           v_play_cnt_up,
+                                           v_comment_cnt,
+                                           v_liked_cnt,
+                                           v_shared_cnt,
+                                           v_duration,
+                                           v_resolution,
+                                           time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
+                                           str(v_username),
+                                           str(v_user_id),
+                                           str(v_user_cover),
+                                           str(v_video_cover),
+                                           str(download_url_up)]]
+                                time.sleep(1)
+                                Feishu.update_values("20ce0c", "A2:Q2", values)
+
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                                Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
+                                # 删除行或列,可选 ROWS、COLUMNS
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                            # 上升榜时长在 1-2 小时内,播放量增长<1000
+                            elif (7200 >= int(time.time()) - int(download_time) >= 3600)\
+                                    and (int(v_play_cnt_up) - int(download_video_play_cnt) < 1000):
+                                # 删除之前保存的该视频信息,并把现在的信息保存进去
+                                Common.logger().info("该视频1小时内的播放量:{}<1000;更新该视频信息:{}",
+                                                     int(v_play_cnt_up) - int(download_video_play_cnt),
+                                                     download_video_title)
                                 # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
                                 Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
                                 # 删除行或列,可选 ROWS、COLUMNS
-                                Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                                Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+
+                                # 看一看+工作表,插入首行
+                                Feishu.insert_columns("SdCHOM", "ROWS", 1, 2)
+
+                                # 获取当前时间
+                                download_up_time = int(time.time())
+                                values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(download_up_time)),
+                                           "上升榜",
+                                           str(download_video_id),
+                                           str(download_video_title),
+                                           v_play_cnt_up,
+                                           v_comment_cnt,
+                                           v_liked_cnt,
+                                           v_shared_cnt,
+                                           v_duration,
+                                           v_resolution,
+                                           time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
+                                           str(v_username),
+                                           str(v_user_id),
+                                           str(v_user_cover),
+                                           str(v_video_cover),
+                                           str(download_url_up)]]
+                                time.sleep(1)
+                                Feishu.update_values("SdCHOM", "A2:Q2", values)
+
                     except Exception as e:
                         # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
                         Common.logger().error("获取视频info异常:{},删除该视频", e)
                         # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("SdCHOM", "ROWS", i + 2, i + 2)
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
         except Exception as e:
             Common.logger().error(e)
 

+ 39 - 45
main/feishu_lib.py

@@ -2,6 +2,8 @@
 # @Author: wangkun
 # @Time: 2022/5/6
 import json
+import time
+
 import requests
 import urllib3
 
@@ -30,6 +32,7 @@ class Feishu:
 
         try:
             urllib3.disable_warnings()
+            time.sleep(1)
             response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
             tenant_access_token = response.json()["tenant_access_token"]
             return tenant_access_token
@@ -82,7 +85,7 @@ class Feishu:
             # valueRenderOption=FormattedValue 计算并格式化单元格;
             # valueRenderOption=Formula单元格中含有公式时返回公式本身;
             # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
-            "valueRenderOption": "ToString",
+            "valueRenderOption": "FormattedValue",
 
             # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
             "dateTimeRenderOption": "",
@@ -92,6 +95,7 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
+            time.sleep(0.5)
             r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
             response = json.loads(r.content.decode("utf8"))
             values = response["data"]["valueRanges"][0]["values"]
@@ -99,11 +103,15 @@ class Feishu:
         except Exception as e:
             Common.logger().error("读取工作表所有数据异常:{}", e)
 
-    # 看一看+工作表,插入
+    # 工作表,插入行或列
     @classmethod
-    def insert_columns(cls, sheetid):
+    def insert_columns(cls, sheetid, majordimension, startindex, endindex):
         """
-        插入行或列
+        工作表,插入行或列
+        :param sheetid: 哪张表
+        :param majordimension: 行或列,默认 ROWS ,可选 ROWS、COLUMNS
+        :param startindex: 开始的位置
+        :param endindex: 结束的位置
         :return:插入首行
         """
         url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
@@ -115,41 +123,29 @@ class Feishu:
         body = {
             "dimension": {
                 "sheetId": sheetid,
-                "majorDimension": "ROWS",  # 默认 ROWS ,可选 ROWS、COLUMNS
-                "startIndex": 1,  # 开始的位置
-                "endIndex": 2  # 结束的位置
+                "majorDimension": majordimension,  # 默认 ROWS ,可选 ROWS、COLUMNS
+                "startIndex": startindex,  # 开始的位置
+                "endIndex": endindex  # 结束的位置
             },
             "inheritStyle": "AFTER"  # BEFORE 或 AFTER,不填为不继承 style
         }
         try:
             urllib3.disable_warnings()
+            time.sleep(0.5)
             r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("插入空行:{}", r.json()["msg"])
+            Common.logger().info("插入空行或列:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("插入空行异常:{}", e)
+            Common.logger().error("插入空行或列异常:{}", e)
 
-    # 看一看+工作表,首行写入数据
+    # 工作表,写入数据
     @classmethod
-    def update_values(cls, sheetid, a1, b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1):
+    def update_values(cls, sheetid, ranges, values):
         """
         写入数据
         :param sheetid: 哪张工作表
-        :param a1: 单元格
-        :param b1: 单元格
-        :param c1: 单元格
-        :param d1: 单元格
-        :param e1: 单元格
-        :param f1: 单元格
-        :param g1: 单元格
-        :param h1: 单元格
-        :param i1: 单元格
-        :param j1: 单元格
-        :param k1: 单元格
-        :param l1: 单元格
-        :param m1: 单元格
-        :param n1: 单元格
-        :param o1: 单元格
-        :return: 
+        :param ranges: 单元格范围
+        :param values: 更新值
+        :return:
         """
 
         url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/values_batch_update"
@@ -160,19 +156,17 @@ class Feishu:
         body = {
             "valueRanges": [
                 {
-                    "range": sheetid + "!A2:O2",
-                    "values": [
-                        [a1, b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1]
-                    ]
+                    "range": sheetid + "!" + ranges,
+                    "values": values
                 },
             ],
         }
         try:
             urllib3.disable_warnings()
             r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("空行写入视频数据:{}", r.json()["msg"])
+            Common.logger().info("写入数据:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("空行写入视频数据异常:{}", e)
+            Common.logger().error("写入数据异常:{}", e)
 
     # 读取单元格数据
     @classmethod
@@ -194,7 +188,7 @@ class Feishu:
             # valueRenderOption=FormattedValue 计算并格式化单元格;
             # valueRenderOption=Formula单元格中含有公式时返回公式本身;
             # valueRenderOption=UnformattedValue计算但不对单元格进行格式化。
-            "valueRenderOption": "ToString",
+            "valueRenderOption": "FormattedValue",
 
             # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
             "dateTimeRenderOption": "",
@@ -204,6 +198,7 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
+            time.sleep(0.5)
             r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
             return r.json()["data"]["valueRange"]["values"][0]
         except Exception as e:
@@ -244,23 +239,22 @@ class Feishu:
 if __name__ == "__main__":
     feishu = Feishu()
 
-    # # 获取飞书api token
-    # feishu.get_token()
+    # 获取飞书api token
+    # print(feishu.get_token())
+    # # 获取表格元数据
+    # feishu.get_metainfo()
 
-    # 获取表格元数据
-    feishu.get_metainfo()
+    # 读取工作表中所有数据
+    # print(feishu.get_values_batch("Zt2PGQ")[1][3])
+    # print(len(feishu.get_values_batch("SdCHOM")))
 
     # # 看一看+工作表,插入首行
-    # print(feishu.insert_columns("SdCHOM"))
-    #
-    # # 看一看+工作表,首行写入数据
-    # print(feishu.update_values("SdCHOM", "a1", "b1", "c1", "d1", "e1", "f1", "g1",
-    #                            "h1", "i1", "j1", "k1", "l1", "m1", "n1", "o1"))
+    # print(feishu.insert_columns("Y8N3Vl"))
 
     # # 查询单元格内容
-    # print(feishu.get_range_value("SdCHOM", "B8:C8"))
+    # print(feishu.get_range_value("Y8N3Vl", "B8:C8"))
     #
     # # 删除行或列,可选 ROWS、COLUMNS
-    # feishu.dimension_range("SdCHOM", "ROWS")
+    # feishu.dimension_range("Y8N3Vl", "ROWS")
 
     pass

+ 32 - 21
main/get_feeds.py

@@ -43,10 +43,11 @@ def get_feeds():
     3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 中去重
     4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
     """
+    Common.logger().info("开始从推荐页获取视频列表")
     host = "https://search.weixin.qq.com"
     url = '/cgi-bin/recwxa/recwxavideolist?'
     video_list_session = Common.get_session()
-    Common.logger().info("获取视频list时,session:{}", video_list_session)
+    # Common.logger().info("获取视频list时,session:{}", video_list_session)
     header = {
         "Connection": "keep-alive",
         "content-type": "application/json",
@@ -98,7 +99,8 @@ def get_feeds():
                         .replace(":", "").replace("*", "").replace("?", "")\
                         .replace("?", "").replace('"', "").replace("<", "")\
                         .replace(">", "").replace("|", "").replace(" ", "")\
-                        .replace("&NBSP", "").replace(".", "。").replace(" ", "")
+                        .replace("&NBSP", "").replace(".", "。").replace(" ", "")\
+                        .replace("小年糕", "")
                     Common.logger().info('视频标题:{}', video_title)
 
                     # 获取视频ID
@@ -148,11 +150,17 @@ def get_feeds():
                     # 获取视频发布时间
                     video_send_date = items[i]["date"]
                     Common.logger().info("视频发布时间:{}",
-                                         time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_send_date)))
+                                         time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_send_date)))
                 
                     # 获取视频用户名
                     video_user = items[i]["source"].strip().replace("\n", "")
                     Common.logger().info('视频用户名:{}', video_user)
+
+                    # user_id
+                    if "openid" not in items[i]:
+                        user_id = 0
+                    else:
+                        user_id = items[i]["openid"]
                 
                     # 获取视频用户头像
                     video_user_cover = items[i]["bizIcon"]
@@ -208,27 +216,30 @@ def get_feeds():
                         Common.logger().info("该视频未下载,添加至kanyikan_feeds:{}", video_title)
 
                         # 看一看+工作表,插入首行
-                        Feishu.insert_columns("SdCHOM")
+                        Feishu.insert_columns("SdCHOM", "ROWS", 1, 2)
 
                         # 获取当前时间
                         get_feeds_time = int(time.time())
-                        # 看一看云文档,工作表 kanyikan_feeds 中写入数据
-                        Feishu.update_values("SdCHOM",
-                                             a1=str(get_feeds_time),
-                                             b1=str(video_id),
-                                             c1=str(video_play_cnt),
-                                             d1=str(video_title),
-                                             e1=str(video_duration),
-                                             f1=str(video_comment_cnt),
-                                             g1=str(video_liked_cnt),
-                                             h1=str(video_shared_cnt),
-                                             i1=str(video_resolution),
-                                             j1=str(video_send_date),
-                                             k1=str(video_user),
-                                             l1=str(video_user_cover),
-                                             m1=str(video_cover),
-                                             n1=str(url),
-                                             o1=str(video_list_session))
+                        # 准备写入云文档的数据
+                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time)),
+                                   "推荐榜",
+                                   video_id,
+                                   video_title,
+                                   video_play_cnt,
+                                   video_comment_cnt,
+                                   video_liked_cnt,
+                                   video_shared_cnt,
+                                   video_duration,
+                                   str(video_width) + "*" + str(video_height),
+                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_send_date)),
+                                   video_user,
+                                   user_id,
+                                   video_user_cover,
+                                   video_cover,
+                                   url]]
+                        time.sleep(1)
+                        # 写入数据
+                        Feishu.update_values("SdCHOM", "A2:P2", values)
     except Exception as e:
         Common.logger().error("获取视频 list 时异常:{}", e)
 

+ 0 - 49
抓取规则.txt

@@ -1,49 +0,0 @@
-==========2022/4/21===========
-- 视频发布7日内,播放量大于1万(当前时间 - 发布时间 <= 7 天)
-- 任务执行规则:
-    1.凌晨0点-10点      7日内播放大于1万 爬取
-    2.早上10点-20点     内容上升榜 爬取
-    3.晚上20点-24点     15万播放爬取
-==============================
-
-
-==========2022/4/15===========
-- 视频发布3日内,播放量大于2万(当前时间 - 发布时间 <= 3 天)
-- 视频时长1分钟以上,10分钟以下
-- 分辨率 宽或高大于720
-- 分享量>0
-- 站内标题=看一看视频原标题
-- 站内封面图=看一看视频原封面图
-- 任务执行规则:
-    1.凌晨0点-10点      3日内播放大于2万 爬取
-    2.早上10点-20点     内容上升榜 爬取
-    3.晚上20点-24点     15万播放爬取
-==============================
-
-
-==========2022/3/29===========
-1.凌晨5:00 - 21:00,跑上升榜爬虫(循环隔 1 小时,检查播放量>=1000)
-2.晚上21:00 - 5:00,跑播放量爬虫(播放量>=200000)
-3.视频下载后,立即上传
-==============================
-
-
-==========2022/3/24===========
-1.周一至周五,跑上升榜爬虫(循环隔 1 小时,检查播放量>=1000)
-2.周六至周日,跑播放量爬虫(播放量>=200000)
-3.视频下载后,立即上传
-4.下周一时,暂停播放量爬虫,恢复上升榜爬虫
-==============================
-
-
-==========2022/3/15===========
-1.拿到外网视频 list
-2.去重
-3.获取当前抓取时间、以及播放量、加上基本规则,存储本地:
-	3.1 分辨率,宽或者高 >= 720 或 无分辨
-	3.2 600 >= 时长 >= 60
-4.循环隔 1 小时,检查播放量 >=1000(当前播放量 - 1 小时前的播放量),开始抓取。同时从本地存储中删除,加入到去重文本中。
-5.下载总条数,先不限制
-6.下载时间:早上 8 点 - 晚上 21 点截止
-7.下载完成后立即上传
-==============================

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác