浏览代码

update download rule

wangkun 3 年之前
父节点
当前提交
6a7cd08dd1
共有 9 个文件被更改,包括 398 次插入432 次删除
  1. 9 1
      README.md
  2. 2 27
      main/common.py
  3. 32 13
      main/demo.py
  4. 288 305
      main/download_kuaishou.py
  5. 33 42
      main/feishu_lib.py
  6. 3 3
      main/publish.py
  7. 8 40
      main/run.py
  8. 4 0
      requirements.txt
  9. 19 1
      抓取规则.txt

+ 9 - 1
README.md

@@ -1 +1,9 @@
-快手和微视小程序的爬虫
+快手和微视小程序的爬虫
+loguru==0.6.0
+oss2==2.15.0
+requests==2.27.1
+urllib3==1.26.9
+python==3.10.0
+执行入口:
+1. cd ./crawler-kuaishou-Windows
+2. python3 main/run.py 

+ 2 - 27
main/common.py

@@ -7,7 +7,6 @@
 from datetime import date, timedelta
 from loguru import logger
 import datetime
-import logging
 import os
 import time
 import requests
@@ -20,34 +19,11 @@ class Common:
     # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
     now = datetime.datetime.now()
     # 昨天 <class 'str'>  2022-04-13
-    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y/%m/%d")
     # 今天 <class 'datetime.date'>  2022-04-14
     today = date.today()
     # 明天 <class 'str'>  2022-04-15
-    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
-
-    # 使用 logging 模块生成日志
-    @staticmethod
-    def crawler_log():
-        """
-        生成 log 日志
-        """
-        # 日志路径
-        log_dir = r"./logs/"
-        log_path = os.getcwd() + os.sep + log_dir
-        if not os.path.isdir(log_path):
-            os.makedirs(log_path)
-
-        # 日志参数
-        log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-        date_format = "%Y-%m-%d %p %H:%M:%S"
-        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
-
-        # 日志初始化
-        logging.basicConfig(filename=log_path + log_name, level=logging.INFO, format=log_format, datefmt=date_format)
-        crawler_logger = logging.getLogger("crawler-log")
-
-        return crawler_logger
+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y/%m/%d")
 
     # 使用 logger 模块生成日志
     @staticmethod
@@ -111,7 +87,6 @@ class Common:
         video_dir = "./videos/" + d_name + "/"
         if not os.path.exists(video_dir):
             os.mkdir(video_dir)
-        cls.logger().info("删除 charles 缓存文件成功")
 
         # 下载视频
         if text == "video":

+ 32 - 13
main/demo.py

@@ -11,6 +11,8 @@ import time
 import requests
 import urllib3
 
+from main.common import Common
+
 
 class Demo:
     @classmethod
@@ -265,20 +267,37 @@ class Demo:
         logger.add(log_dir+log_name, level="INFO", rotation='13:56')
         return logger
 
+    @classmethod
+    def download_video(cls):
+        video_url = "https://v2-h5.kwaicdn.com/ksc1/JEq3hgQmbaJq_sHzHNQnYGPNJ5XRHnUJ" \
+                    "eT1nP2eHeVEnOkXxfdBGpkvLy9ZCTSLMjiRJqiUftrJgi5qZrrz-ROSGhNwhUHgX2Z" \
+                    "u4XgAqNZS5Kp0p3GzFdsB9jgpH3VBtmBVwq4l8Or0YZV6MySJmh6mbVPqePrLdeQ9GxY" \
+                    "yvMbj4Ds2Z9yXQ3X9knED5xafh.mp4?clientCacheKey=3xkisjhn2tnwf5w_b.mp4&tt" \
+                    "=b&di=de8075d0&bp=60000&pkey=AAWdtvPdTRXTA9T3NIXmzYfB0em4XZCTrPJZkmFerVrc" \
+                    "RFtBbzkvz2ZUVuiUJK3VrRt3w2_cn2y7tQr-dpqkeM7HInHNkaGnDbnFXe530Zm65RfEdzjzNx_ErSTX5YonXTg"
+        Common.download_method("video", "video", video_url)
+
+    @classmethod
+    def time2(cls):
+        time1 = "2022/06/01 18:50:12"
+        timeStamp = int(time.mktime(time.strptime(time1, "%Y/%m/%d %H:%M:%S")))
+        print(timeStamp)
+
 
 if __name__ == "__main__":
-    while True:
-        demo = Demo()
-        # demo.demo1()
-        # demo.time()
-        # demo.get_douyin_feeds()
-        # demo.demo2()
-        # demo.get_weishi_feeds()
-        # demo.edit_str()
-        # demo.sensitive_words()
-        demo.logger().info("hello")
-        time.sleep(10)
-        a = "hahaha"
-        demo.logger().exception("what:{}", a)
+    demo = Demo()
+    demo.time2()
+    # demo.download_video()
+    # demo.demo1()
+    # demo.time()
+    # demo.get_douyin_feeds()
+    # demo.demo2()
+    # demo.get_weishi_feeds()
+    # demo.edit_str()
+    # demo.sensitive_words()
+    # demo.logger().info("hello")
+    # time.sleep(10)
+    # a = "hahaha"
+    # demo.logger().exception("what:{}", a)
 
     # pass

+ 288 - 305
main/download_kuaishou.py

@@ -5,10 +5,12 @@
 从 微信小程序-快手短视频 中,下载符合规则的视频
 """
 import json
+import os
+import sys
 import time
-
 import requests
 import urllib3
+sys.path.append(os.getcwd())
 from main.common import Common
 from main.feishu_lib import Feishu
 from main.publish import Publish
@@ -19,13 +21,26 @@ proxies = {"http": None, "https": None}
 class KuaiShou:
     # 已下载视频列表
     download_video_list = []
+    # 配置微信号
+    Referer = Feishu.get_range_value("f1R7Mx", "C3:C3")[0][0]["link"]
+    NS_sig3 = Feishu.get_range_value("f1R7Mx", "C4:C4")[0]
+    NS_sig3_origin = Feishu.get_range_value("f1R7Mx", "C5:C5")[0]
+    did = Feishu.get_range_value("f1R7Mx", "C6:C6")[0]
+    session_key = Feishu.get_range_value("f1R7Mx", "C7:C7")[0]
+    unionid = Feishu.get_range_value("f1R7Mx", "C8:C8")[0]
+    eUserStableOpenId = Feishu.get_range_value("f1R7Mx", "C9:C9")[0]
+    openId = Feishu.get_range_value("f1R7Mx", "C10:C10")[0]
+    eOpenUserId = Feishu.get_range_value("f1R7Mx", "C11:C11")[0]
+    kuaishou_wechat_app_st = Feishu.get_range_value("f1R7Mx", "C12:C12")[0]
+    passToken = Feishu.get_range_value("f1R7Mx", "C13:C13")[0]
+    userId = Feishu.get_range_value("f1R7Mx", "C14:C14")[0]
 
     @classmethod
-    def kuaishou_sensitive_words(cls):
+    def sensitive_words(cls):
         # 敏感词库列表
         word_list = []
         # 从云文档读取所有敏感词,添加到词库列表
-        lists = Feishu.get_values_batch("rDYi82")
+        lists = Feishu.get_values_batch("fn8IDi")
         for i in lists:
             for j in i:
                 # 过滤空的单元格内容
@@ -50,9 +65,9 @@ class KuaiShou:
         """
         if 600 >= int(float(d_duration)) >= 60:
             if int(d_width) >= 720 or int(d_height) >= 720:
-                if int(d_play_cnt) >= 5:
-                    if int(d_like_cnt) >= 30000:
-                        if int(d_share_cnt) >= 1000:
+                if int(d_play_cnt) >= 50000:
+                    if int(d_like_cnt) >= 50000:
+                        if int(d_share_cnt) >= 2000:
                             return True
                         else:
                             return False
@@ -64,21 +79,28 @@ class KuaiShou:
         return False
 
     @classmethod
-    def kuaishou_get_recommend(cls):
+    def get_feeds(cls):
         """
         1.从快手小程序首页推荐,获取视频列表
-        2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=kCSk2e 中去重
-        3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7 中去重
-        4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
+        2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c 中去重
+        3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=Zt2PGQ 中去重
+        4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=Zt2PGQ
         """
         url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/recommend"
+        headers = {
+            "content-type": "application/json",
+            "Accept-Encoding": "gzip,compress,br,deflate",
+            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
+                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
+                          ' MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN',
+            "Referer": str(cls.Referer),
+        }
         params = {
-            "__NS_sig3": "e6f6b281ea31e3d7d1bbb8b91f662576fc25f7c3a7a7a5a5aaaba8b2",
-            "__NS_sig3_origin": "3sCt3iAAAAAAAAAAAAAAAwEQBv2b8ewCwkZKaiAAAAAPg0soi"
-                                "e7GiOlU vF4zPrG1Nl6xvaoBgFd3MwTzOed9w=="
+            "__NS_sig3": str(cls.NS_sig3),
+            "__NS_sig3_origin": str(cls.NS_sig3_origin)
         }
         cookies = {
-            "did": "wxo_05f915ac6b1deca87db36cea1a0fd18fae6c",
+            "did": str(cls.did),
             "preMinaVersion": "v3.109.0",
             "sid": "kuaishou.wechat.app",
             "appId": "ks_wechat_small_app_2",
@@ -87,311 +109,270 @@ class KuaiShou:
             "kpn": "WECHAT_SMALL_APP",
             "kpf": "OUTSIDE_ANDROID_H5",
             "language": "zh_CN",
-            "smallAppVersion": "v3.109.0",
-            "session_key": "123005bcc551a92aac29cdb96190251c9f492c29d4ba6c502dc"
-                           "0d2f8b8d18df356a2f7a22d6924d1dd34b8554a64af49b1bb1a"
-                           "1236cd2f69c25d4ac2a2531ebcd28c179da14b222023f9e111c"
-                           "c4d3b064ac7b0915d8c9fdaccb59e4048e96a5c38a32b2ce9f4abf628053001",
-            "unionid": "V2:1230b56c8337908c3eecba63142a58daca05535c1f14bf67d3d8"
-                       "85cace91a7db335c5572d204762d075f24aa84412e2955711a12bb9"
-                       "2bd9c2290489ba7a733708a4a446de83822205ab727650489dda0db"
-                       "9d2a226c5ddb66d88a1f1373283a3d3b959611d816660028053001",
-            "eUserStableOpenId": "12303325e8710eb802137c70fd1fb65997a4e5e33d82"
-                                 "cddd409d335d096e20873e07ee472090133bc7a67e5c"
-                                 "749da045d9a31a12da4c4c26181d432b873ec39432f4"
-                                 "10196c6c2220323d0e6b562d1b3786aefb352b4e509c"
-                                 "d96f3466b7b2e5e74b904a94c40792d928053001",
-            "openId": "o5otV45DcV1EUsWw4fAUk_iq0YSA",
-            "eOpenUserId": "124074b7726c996283f25044a42e2c7427e929cd6d968c5342"
-                           "330e61fc8939e57b0da4ffe21887f3abc8784175f73e1a267d"
-                           "671247273806f293f64c9c8c2adc00a21a12bb92bd9c229048"
-                           "9ba7a733708a4a446de8382220534aa79c69b74866bb09187e"
-                           "eceec880fa1e0fa421b7df8b3289dab603b17c4828053001",
-            "kuaishou.wechat.app_st": "ChZrdWFpc2hvdS53ZWNoYXQuYXBwLnN0ErAB8aO"
-                                      "EcB6jh4CMSJ-p_4BJFCId0PKNa_5IeFfeV_tj7q"
-                                      "CjdXK0y13CSte6-KHbNK9BPo6Rjy3OGny0sh4Zb"
-                                      "5AUl3Q_zqVXe2TunW7_F3nlTdJOdZ6iVIhPrHa1"
-                                      "CM0Y-cG9gS4FDDzTvejfWaTI0CbjfNN0RZXzYVE"
-                                      "AUVT_BNgUVDtYBbEY792gPylMfXxwxKMSzkhaDe"
-                                      "eaHkGCWUj62FGCFYQ9Fw2W3d7suCXFsNylqT4aE"
-                                      "s8oNwmycUiygfvfKuoXlHkbeSIgOhEFMZ3ArImS"
-                                      "vFY_OwLJDHak1iXRO8g5TwzHTvBT3WcoBTAB",
-            "passToken": "ChNwYXNzcG9ydC5wYXNzLXRva2VuEpABI42IhPCJHfFngXC3i-vF"
-                         "3daRTB-EtnAYyE6HpfWcPoZ6VSRDvKrom_RvltQ2zKk1T3_FJteb"
-                         "mv7ZzQLD7IicnTypaGoeflb7KQVrAv50Mp_JL4ObfBu_xTiwI53t"
-                         "bTlM6iML0G7DFd16K5z0jZZ1xECKVQQbk_vIqnseUujFIWAsKcDz"
-                         "BqqfnQNbUU5DzDUkGhKgKyzmNjRDxLfpDU5SPFhJmG0iIGBZ_Vd-"
-                         "7eT8i_Xit9ZPM-zdFpnRZFveFE9iplMg8Z06KAUwAQ",
-            "userId": "2845397958"
+            "smallAppVersion": "v3.114.0",
+            "session_key": str(cls.session_key),
+            "unionid": str(cls.unionid),
+            "eUserStableOpenId": str(cls.eUserStableOpenId),
+            "openId": str(cls.openId),
+            "eOpenUserId": str(cls.eOpenUserId),
+            "kuaishou.wechat.app_st": str(cls.kuaishou_wechat_app_st),
+            "passToken": str(cls.passToken),
+            "userId": str(cls.userId)
         }
         json_data = {
-            "thirdPartyUserId": 2845397958,
-            "photoId": "5250352807040393911",
-            "forwardUserId": 2845397958,
             "count": 10,
-            "portal": 2,
+            "portal": 1,
             "pageType": 2,
-            "needLivestream": "true",
-            "extraRequestInfo": "{\"scene\":1074,\"fid\":\"2845397958\","
-                                "\"sharerUserId\":\"2845397958\",\"curPhotoIndex\":0,"
-                                "\"adShow\":true,\"weChatAd\":{},\"page\":0}",
+            "extraRequestInfo": "{\"scene\":1089,\"fid\":\"\",\"sharerUserId\":\"\",\"curPhotoIndex\":0,"
+                                "\"adShow\":true,\"weChatAd\":{},\"headurl\":\"https://js2.a.kwimgs.com/udata/pkg"
+                                "/fe/profiel_icon_photo_normal@3x.fb3ec1af.png\",\"page\":0}",
+            "needLivestream": True,
             "pcursor": 0,
             "sourceFrom": 2,
+            "thirdPartyUserId": int(cls.userId)
         }
 
         try:
             urllib3.disable_warnings()
-            r = requests.post(url=url, params=params, cookies=cookies, json=json_data, proxies=proxies, verify=False)
+            r = requests.post(url=url, headers=headers, params=params,
+                              cookies=cookies, json=json_data, proxies=proxies, verify=False)
             response = json.loads(r.content.decode("utf8"))
-            if "feeds" not in response:
-                Common.logger().warning("获取快手视频 list 出错:{},休眠 10s".format(response))
-                time.sleep(10)
-            else:
-                feeds = response["feeds"]
-                for i in range(len(feeds)):
-                    # 视频标题过滤话题及处理特殊字符
-                    kuaishou_title = feeds[i]["caption"]
-                    title_split1 = kuaishou_title.split(" #")
-                    if title_split1[0] != "":
-                        title1 = title_split1[0]
-                    else:
-                        title1 = title_split1[-1]
-
-                    title_split2 = title1.split(" #")
-                    if title_split2[0] != "":
-                        title2 = title_split2[0]
-                    else:
-                        title2 = title_split2[-1]
-
-                    title_split3 = title2.split("@")
-                    if title_split3[0] != "":
-                        title3 = title_split3[0]
-                    else:
-                        title3 = title_split3[-1]
-
-                    video_title = title3.strip().replace("\n", "") \
-                        .replace("/", "").replace("快手", "").replace(" ", "") \
-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
-                        .replace("#", "").replace(".", "。").replace("\\", "") \
-                        .replace(":", "").replace("*", "").replace("?", "") \
-                        .replace("?", "").replace('"', "").replace("<", "") \
-                        .replace(">", "").replace("|", "")
-                    Common.logger().info("video_title:{}".format(video_title))
-
-                    if "photoId" not in feeds[i]:
-                        photo_id = "0"
-                        Common.logger().info("photo_id:{}".format(photo_id))
-                    else:
-                        photo_id = feeds[i]["photoId"]
-                        Common.logger().info("photo_id:{}".format(photo_id))
+            feeds = response["feeds"]
+            for i in range(len(feeds)):
+                # 视频标题过滤话题及处理特殊字符
+                kuaishou_title = feeds[i]["caption"]
+                title_split1 = kuaishou_title.split(" #")
+                if title_split1[0] != "":
+                    title1 = title_split1[0]
+                else:
+                    title1 = title_split1[-1]
 
-                    if "viewCount" not in feeds[i]:
-                        video_play_cnt = "0"
-                        Common.logger().info("video_play_cnt:0")
-                    else:
-                        video_play_cnt = feeds[i]["viewCount"]
-                        Common.logger().info("video_play_cnt:{}".format(video_play_cnt))
+                title_split2 = title1.split(" #")
+                if title_split2[0] != "":
+                    title2 = title_split2[0]
+                else:
+                    title2 = title_split2[-1]
 
-                    if "likeCount" not in feeds[i]:
-                        video_like_cnt = "0"
-                        Common.logger().info("video_like_cnt:0")
-                    else:
-                        video_like_cnt = feeds[i]["likeCount"]
-                        Common.logger().info("video_like_cnt:{}".format(video_like_cnt))
+                title_split3 = title2.split("@")
+                if title_split3[0] != "":
+                    title3 = title_split3[0]
+                else:
+                    title3 = title_split3[-1]
+
+                video_title = title3.strip().replace("\n", "") \
+                    .replace("/", "").replace("快手", "").replace(" ", "") \
+                    .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
+                    .replace("#", "").replace(".", "。").replace("\\", "") \
+                    .replace(":", "").replace("*", "").replace("?", "") \
+                    .replace("?", "").replace('"', "").replace("<", "") \
+                    .replace(">", "").replace("|", "")
+                Common.logger().info("video_title:{}".format(video_title))
+
+                if "photoId" not in feeds[i]:
+                    photo_id = "0"
+                    Common.logger().info("photo_id:{}".format(photo_id))
+                else:
+                    photo_id = feeds[i]["photoId"]
+                    Common.logger().info("photo_id:{}".format(photo_id))
 
-                    if "shareCount" not in feeds[i]:
-                        video_share_cnt = "0"
-                        Common.logger().info("video_share_cnt:0")
-                    else:
-                        video_share_cnt = feeds[i]["shareCount"]
-                        Common.logger().info("video_share_cnt:{}".format(video_share_cnt))
+                if "viewCount" not in feeds[i]:
+                    video_play_cnt = "0"
+                    Common.logger().info("video_play_cnt:0")
+                else:
+                    video_play_cnt = feeds[i]["viewCount"]
+                    Common.logger().info("video_play_cnt:{}".format(video_play_cnt))
 
-                    if "commentCount" not in feeds[i]:
-                        video_comment_cnt = "0"
-                        Common.logger().info("video_comment_cnt:0")
-                    else:
-                        video_comment_cnt = feeds[i]["commentCount"]
-                        Common.logger().info("video_comment_cnt:{}".format(video_comment_cnt))
+                if "likeCount" not in feeds[i]:
+                    video_like_cnt = "0"
+                    Common.logger().info("video_like_cnt:0")
+                else:
+                    video_like_cnt = feeds[i]["likeCount"]
+                    Common.logger().info("video_like_cnt:{}".format(video_like_cnt))
 
-                    if "duration" not in feeds[i]:
-                        video_duration = "0"
-                        Common.logger().info("video_duration:不存在")
-                    else:
-                        video_duration = int(int(feeds[i]["duration"]) / 1000)
-                        Common.logger().info("video_duration:{}秒".format(video_duration))
-
-                    if "width" not in feeds[i] or "height" not in feeds[i]:
-                        video_width = "0"
-                        video_height = "0"
-                        video_resolution = str(video_width) + "*" + str(video_height)
-                        Common.logger().info("无分辨率")
-                    else:
-                        video_width = feeds[i]["width"]
-                        video_height = feeds[i]["height"]
-                        video_resolution = str(video_width) + "*" + str(video_height)
-                        Common.logger().info("video_resolution:{}".format(video_resolution))
-
-                    if "timestamp" not in feeds[i]:
-                        video_send_time = "0"
-                        Common.logger().info("video_send_time:不存在")
-                    else:
-                        video_send_time = feeds[i]["timestamp"]
-                        Common.logger().info("video_send_time:{}".format(
-                            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
+                if "shareCount" not in feeds[i]:
+                    video_share_cnt = "0"
+                    Common.logger().info("video_share_cnt:0")
+                else:
+                    video_share_cnt = feeds[i]["shareCount"]
+                    Common.logger().info("video_share_cnt:{}".format(video_share_cnt))
 
-                    user_name = feeds[i]["userName"].strip().replace("\n", "") \
-                        .replace("/", "").replace("快手", "").replace(" ", "") \
-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-                    Common.logger().info("user_name:{}".format(user_name))
+                if "commentCount" not in feeds[i]:
+                    video_comment_cnt = "0"
+                    Common.logger().info("video_comment_cnt:0")
+                else:
+                    video_comment_cnt = feeds[i]["commentCount"]
+                    Common.logger().info("video_comment_cnt:{}".format(video_comment_cnt))
 
-                    user_id = feeds[i]["userId"]
-                    Common.logger().info("user_id:{}".format(user_id))
+                if "duration" not in feeds[i]:
+                    video_duration = "0"
+                    Common.logger().info("video_duration:不存在")
+                else:
+                    video_duration = int(int(feeds[i]["duration"]) / 1000)
+                    Common.logger().info("video_duration:{}秒".format(video_duration))
+
+                if "width" not in feeds[i] or "height" not in feeds[i]:
+                    video_width = "0"
+                    video_height = "0"
+                    video_resolution = str(video_width) + "*" + str(video_height)
+                    Common.logger().info("无分辨率")
+                else:
+                    video_width = feeds[i]["width"]
+                    video_height = feeds[i]["height"]
+                    video_resolution = str(video_width) + "*" + str(video_height)
+                    Common.logger().info("video_resolution:{}".format(video_resolution))
+
+                if "timestamp" not in feeds[i]:
+                    video_send_time = "0"
+                    Common.logger().info("video_send_time:不存在")
+                else:
+                    video_send_time = feeds[i]["timestamp"]
+                    Common.logger().info("video_send_time:{}".format(
+                        time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
 
-                    if "headUrl" not in feeds[i]:
-                        head_url = "0"
-                        Common.logger().info("head_url:不存在")
-                    else:
-                        head_url = feeds[i]["headUrl"]
-                        Common.logger().info("head_url:{}".format(head_url))
+                user_name = feeds[i]["userName"].strip().replace("\n", "") \
+                    .replace("/", "").replace("快手", "").replace(" ", "") \
+                    .replace(" ", "").replace("&NBSP", "").replace("\r", "")
+                Common.logger().info("user_name:{}".format(user_name))
 
-                    if len(feeds[i]["coverUrls"]) == 0:
-                        cover_url = "0"
-                        Common.logger().info("cover_url:不存在")
-                    else:
-                        cover_url = feeds[i]["coverUrls"][0]["url"]
-                        Common.logger().info("cover_url:{}".format(cover_url))
+                user_id = feeds[i]["userId"]
+                Common.logger().info("user_id:{}".format(user_id))
 
-                    if len(feeds[i]["mainMvUrls"]) == 0:
-                        video_url = "0"
-                        Common.logger().info("video_url:不存在")
-                    else:
-                        video_url = feeds[i]["mainMvUrls"][0]["url"]
-                        Common.logger().info("video_url:{}".format(video_url))
-
-                    # 视频标题过滤话题及处理特殊字符
-                    kuaishou_title = feeds[i]["caption"]
-                    title_split1 = kuaishou_title.split(" #")
-                    if title_split1[0] != "":
-                        title1 = title_split1[0]
-                    else:
-                        title1 = title_split1[-1]
+                if "headUrl" not in feeds[i]:
+                    head_url = "0"
+                    Common.logger().info("head_url:不存在")
+                else:
+                    head_url = feeds[i]["headUrl"]
+                    Common.logger().info("head_url:{}".format(head_url))
 
-                    title_split2 = title1.split(" #")
-                    if title_split2[0] != "":
-                        title2 = title_split2[0]
-                    else:
-                        title2 = title_split2[-1]
+                if len(feeds[i]["coverUrls"]) == 0:
+                    cover_url = "0"
+                    Common.logger().info("cover_url:不存在")
+                else:
+                    cover_url = feeds[i]["coverUrls"][0]["url"]
+                    Common.logger().info("cover_url:{}".format(cover_url))
 
-                    title_split3 = title2.split("@")
-                    if title_split3[0] != "":
-                        title3 = title_split3[0]
-                    else:
-                        title3 = title_split3[-1]
-
-                    video_title = title3.strip().replace("\n", "")\
-                        .replace("/", "").replace("快手", "").replace(" ", "")\
-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")\
-                        .replace("#", "").replace(".", "。").replace("\\", "")\
-                        .replace(":", "").replace("*", "").replace("?", "")\
-                        .replace("?", "").replace('"', "").replace("<", "")\
-                        .replace(">", "").replace("|", "")
-
-                    Common.logger().info("video_title:{}".format(video_title))
-
-                    # 过滤无效视频
-                    if photo_id == "0" \
-                            or head_url == "0" \
-                            or cover_url == "0"\
-                            or video_url == "0"\
-                            or video_duration == "0"\
-                            or video_send_time == "0"\
-                            or user_name == ""\
-                            or video_title == "":
-                        Common.logger().info("无效视频")
-                    # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=kCSk2e
-                    elif photo_id in [j for i in Feishu.get_values_batch("kCSk2e") for j in i]:
-                        Common.logger().info("该视频已下载:{}", video_title)
-                    # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
-                    elif photo_id in [j for i in Feishu.get_values_batch("YWeCh7") for j in i]:
-                        Common.logger().info("该视频已在feeds中:{}", video_title)
-                    # 判断敏感词
-                    elif any(word if word in kuaishou_title else False
-                                   for word in cls.kuaishou_sensitive_words()) is True:
-                        Common.logger().info("视频已中敏感词:{}".format(kuaishou_title))
-                    else:
-                        Common.logger().info("该视频未下载,添加至feeds中:{}".format(video_title))
-                        # feeds工作表,插入首行
-                        Feishu.insert_columns("YWeCh7")
-
-                        # 获取当前时间
-                        get_feeds_time = int(time.time())
-                        # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
-                        Feishu.update_values("YWeCh7",
-                                             a1=str(get_feeds_time),
-                                             b1=str(photo_id),
-                                             c1=str(video_play_cnt),
-                                             d1=str(video_title),
-                                             e1=str(video_duration),
-                                             f1=str(video_comment_cnt),
-                                             g1=str(video_like_cnt),
-                                             h1=str(video_share_cnt),
-                                             i1=str(video_resolution),
-                                             j1=str(video_send_time),
-                                             k1=str(user_name),
-                                             l1=str(head_url),
-                                             m1=str(cover_url),
-                                             n1=str(video_url),
-                                             o1=str("wxo_b07ba02ad4340205d89b47c76030bb090977"))
+                if len(feeds[i]["mainMvUrls"]) == 0:
+                    video_url = "0"
+                    Common.logger().info("video_url:不存在")
+                else:
+                    video_url = feeds[i]["mainMvUrls"][0]["url"]
+                    Common.logger().info("video_url:{}".format(video_url))
+
+                # 过滤无效视频
+                if photo_id == "0" \
+                        or head_url == "0" \
+                        or cover_url == "0" \
+                        or video_url == "0" \
+                        or video_duration == "0" \
+                        or video_send_time == "0" \
+                        or user_name == "" \
+                        or video_title == "":
+                    Common.logger().info("无效视频")
+                # 判断敏感词
+                elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
+                    Common.logger().info("视频已中敏感词:{}".format(kuaishou_title))
+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c
+                elif photo_id in [j for i in Feishu.get_values_batch("3b207c") for j in i]:
+                    Common.logger().info("该视频已下载:{}", video_title)
+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=Zt2PGQ
+                elif photo_id in [j for i in Feishu.get_values_batch("Zt2PGQ") for j in i]:
+                    Common.logger().info("该视频已在feeds中:{}", video_title)
+                else:
+                    Common.logger().info("该视频未下载,添加至feeds中:{}".format(video_title))
+                    # feeds工作表,插入首行
+                    time.sleep(1)
+                    Feishu.insert_columns("Zt2PGQ", "ROWS", 1, 2)
+
+                    # 获取当前时间
+                    get_feeds_time = int(time.time())
+                    # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
+                    values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))),
+                               photo_id,
+                               video_title,
+                               video_play_cnt,
+                               video_comment_cnt,
+                               video_like_cnt,
+                               video_share_cnt,
+                               video_duration,
+                               video_resolution,
+                               time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
+                               user_name,
+                               user_id,
+                               head_url,
+                               cover_url,
+                               video_url]]
+                    # 等待 1s,防止操作云文档太频繁,导致报错
+                    time.sleep(1)
+                    Feishu.update_values("Zt2PGQ", "A2:P2", values)
         except Exception as e:
             Common.logger().error("获取视频 list 异常:{}".format(e))
 
     @classmethod
-    def kuaishou_download_play_video(cls, env):
+    def download_publish(cls, env):
         """
-        1.从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7 中读取视频信息
+        1.从 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=Zt2PGQ 中读取视频信息
         2.下载并上传符合规则的视频
         测试环境:env == dev
         正式环境:env == prod
         """
         try:
-            if len(Feishu.get_values_batch("YWeCh7")) == 1:
+            if len(Feishu.get_values_batch("Zt2PGQ")) == 1:
                 pass
             else:
-                for i in range(len(Feishu.get_values_batch("YWeCh7"))):
+                for i in range(1, len(Feishu.get_values_batch("Zt2PGQ")) + 1):
                     time.sleep(1)
                     try:
-                        download_photo_id = Feishu.get_values_batch("YWeCh7")[i + 1][1]
-                        download_video_play_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][2]
-                        download_video_title = Feishu.get_values_batch("YWeCh7")[i + 1][3]
-                        download_video_duration = Feishu.get_values_batch("YWeCh7")[i + 1][4]
-                        download_video_comment_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][5]
-                        download_video_like_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][6]
-                        download_video_share_cnt = Feishu.get_values_batch("YWeCh7")[i + 1][7]
-                        download_video_resolution = Feishu.get_values_batch("YWeCh7")[i + 1][8]
+                        download_photo_id = Feishu.get_values_batch("Zt2PGQ")[i][1]
+                        # Common.logger().info("download_photo_id:{}", download_photo_id)
+                        download_video_title = Feishu.get_values_batch("Zt2PGQ")[i][2]
+                        # Common.logger().info("download_video_title:{}", download_video_title)
+                        download_video_play_cnt = Feishu.get_values_batch("Zt2PGQ")[i][3]
+                        # Common.logger().info("download_video_play_cnt:{}", download_video_play_cnt)
+                        download_video_comment_cnt = Feishu.get_values_batch("Zt2PGQ")[i][4]
+                        # Common.logger().info("download_video_comment_cnt:{}", download_video_comment_cnt)
+                        download_video_like_cnt = Feishu.get_values_batch("Zt2PGQ")[i][5]
+                        # Common.logger().info("download_video_like_cnt:{}", download_video_like_cnt)
+                        download_video_share_cnt = Feishu.get_values_batch("Zt2PGQ")[i][6]
+                        # Common.logger().info("download_video_share_cnt:{}", download_video_share_cnt)
+                        download_video_duration = Feishu.get_values_batch("Zt2PGQ")[i][7]
+                        # Common.logger().info("download_video_duration:{}", download_video_duration)
+                        download_video_resolution = Feishu.get_values_batch("Zt2PGQ")[i][8]
+                        # Common.logger().info("download_video_resolution:{}", download_video_resolution)
                         download_video_width = download_video_resolution.split("*")[0]
                         download_video_height = download_video_resolution.split("*")[-1]
-                        download_video_send_time = Feishu.get_values_batch("YWeCh7")[i + 1][9]
-                        download_user_name = Feishu.get_values_batch("YWeCh7")[i + 1][10]
-                        download_head_url = Feishu.get_values_batch("YWeCh7")[i + 1][11]
-                        download_cover_url = Feishu.get_values_batch("YWeCh7")[i + 1][12]
-                        download_video_url = Feishu.get_values_batch("YWeCh7")[i + 1][13]
-                        download_video_session = Feishu.get_values_batch("YWeCh7")[i + 1][14]
-
+                        download_video_send_time = Feishu.get_values_batch("Zt2PGQ")[i][9]
+                        # Common.logger().info("download_video_send_time:{}", download_video_send_time)
+                        download_user_name = Feishu.get_values_batch("Zt2PGQ")[i][10]
+                        # Common.logger().info("download_user_name:{}", download_user_name)
+                        download_user_id = Feishu.get_values_batch("Zt2PGQ")[i][11]
+                        # Common.logger().info("download_user_id:{}", download_user_id)
+                        download_head_url = Feishu.get_values_batch("Zt2PGQ")[i][12][0]["link"]
+                        # Common.logger().info("download_head_url:{}", download_head_url)
+                        download_cover_url = Feishu.get_values_batch("Zt2PGQ")[i][13][0]["link"]
+                        # Common.logger().info("download_cover_url:{}", download_cover_url)
+                        download_video_url = Feishu.get_values_batch("Zt2PGQ")[i][14][0]["link"]
+                        # Common.logger().info("download_video_url:{}", download_video_url)
+
+                        # 过滤空行
+                        if download_photo_id is None or download_video_title is None or download_video_play_cnt is None:
+                            Common.logger().warning("空行,略过")
+                        # 去重
+                        elif download_photo_id in [j for i in Feishu.get_values_batch("3b207c") for j in i]:
+                            Common.logger().info("该视频已下载:{}", download_video_title)
                         # 下载规则
-                        if download_photo_id not in [j for i in Feishu.get_values_batch("kCSk2e") for j in i]\
-                                and cls.kuaishou_download_rule(download_video_duration,
-                                                               download_video_width,
-                                                               download_video_height,
-                                                               download_video_play_cnt,
-                                                               download_video_like_cnt,
-                                                               download_video_share_cnt) is True:
+                        elif cls.kuaishou_download_rule(
+                                download_video_duration, download_video_width, download_video_height,
+                                download_video_play_cnt, download_video_like_cnt, download_video_share_cnt) is True:
                             Common.logger().info("开始下载快手视频:{}".format(download_video_title))
+
                             # 下载封面
-                            Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
+                            Common.download_method(text="cover",
+                                                   d_name=str(download_video_title), d_url=str(download_cover_url))
                             # 下载视频
-                            Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
+                            Common.download_method(text="video",
+                                                   d_name=str(download_video_title), d_url=str(download_video_url))
                             # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                            with open(r"./videos/" + download_video_title
+                            with open("./videos/" + download_video_title
                                       + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
                                 f_a.write(str(download_photo_id) + "\n" +
                                           str(download_video_title) + "\n" +
@@ -401,12 +382,13 @@ class KuaiShou:
                                           str(download_video_like_cnt) + "\n" +
                                           str(download_video_share_cnt) + "\n" +
                                           str(download_video_resolution) + "\n" +
-                                          str(download_video_send_time) + "\n" +
+                                          str(int(time.mktime(
+                                              time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
                                           str(download_user_name) + "\n" +
                                           str(download_head_url) + "\n" +
                                           str(download_video_url) + "\n" +
                                           str(download_cover_url) + "\n" +
-                                          str(download_video_session))
+                                          str(cls.did))
                             Common.logger().info("==========视频信息已保存至info.txt==========")
 
                             # 添加视频 ID 到 list,用于统计当次下载总数
@@ -416,45 +398,45 @@ class KuaiShou:
                             Common.logger().info("开始上传视频:{}".format(download_video_title))
                             Publish.upload_and_publish(env, "play")
 
-                            # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=kCSk2e
+                            # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c
                             Common.logger().info("保存视频ID至云文档:{}", download_video_title)
                             # 视频ID工作表,插入首行
-                            Feishu.insert_columns("kCSk2e")
+                            Feishu.insert_columns("3b207c", "ROWS", 1, 2)
                             # 视频ID工作表,首行写入数据
                             upload_time = int(time.time())
-                            Feishu.update_values("kCSk2e",
-                                                 str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time))),
-                                                 str(download_photo_id),
-                                                 str(download_video_play_cnt),
-                                                 str(download_video_title),
-                                                 str(download_video_duration),
-                                                 str(download_video_comment_cnt),
-                                                 str(download_video_like_cnt),
-                                                 str(download_video_share_cnt),
-                                                 str(download_video_resolution),
-                                                 str(time.strftime("%Y-%m-%d %H:%M:%S",
-                                                                   time.localtime(
-                                                                       int(download_video_send_time) / 1000))),
-                                                 str(download_user_name),
-                                                 str(download_head_url),
-                                                 str(download_cover_url),
-                                                 str(download_video_url),
-                                                 str(download_video_session))
-
-                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
+                            values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))),
+                                       str(download_photo_id),
+                                       str(download_video_title),
+                                       str(download_video_play_cnt),
+                                       str(download_video_comment_cnt),
+                                       str(download_video_like_cnt),
+                                       str(download_video_share_cnt),
+                                       str(download_video_duration),
+                                       str(download_video_resolution),
+                                       str(download_video_send_time),
+                                       str(download_user_name),
+                                       str(download_user_id),
+                                       str(download_head_url),
+                                       str(download_cover_url),
+                                       str(download_video_url)]]
+                            time.sleep(1)
+                            Feishu.update_values("3b207c", "A2:Q2", values)
+
+                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Zt2PGQ
                             Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
                             # 删除行或列,可选 ROWS、COLUMNS
-                            Feishu.dimension_range("YWeCh7", "ROWS", i + 2, i + 2)
+                            time.sleep(1)
+                            Feishu.dimension_range("Zt2PGQ", "ROWS", i + 1, i + 1)
                         else:
-                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=YWeCh7
+                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Zt2PGQ
                             Common.logger().info("该视频不满足下载规则,删除在云文档中的信息:{}", download_video_title)
                             # 删除行或列,可选 ROWS、COLUMNS
-                            Feishu.dimension_range("YWeCh7", "ROWS", i + 2, i + 2)
+                            Feishu.dimension_range("Zt2PGQ", "ROWS", i + 1, i + 1)
                     except Exception as e:
                         Common.logger().error("视频 info 异常,删除该视频信息", e)
                         # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("YWeCh7", "ROWS", i + 2, i + 2)
-                    cls.kuaishou_download_play_video("prod")
+                        Feishu.dimension_range("Zt2PGQ", "ROWS", i + 1, i + 1)
+                cls.download_publish("prod")
 
         except Exception as e:
             Common.logger().error(e)
@@ -462,4 +444,5 @@ class KuaiShou:
 
 if __name__ == "__main__":
     kuaishou = KuaiShou()
-    kuaishou.kuaishou_get_recommend()
+    kuaishou.get_feeds()
+    kuaishou.download_publish("dev")

+ 33 - 42
main/feishu_lib.py

@@ -2,6 +2,8 @@
 # @Author: wangkun
 # @Time: 2022/5/9
 import json
+import time
+
 import requests
 import urllib3
 
@@ -14,8 +16,8 @@ class Feishu:
     """
     编辑飞书云文档
     """
-    feishu_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
-    spreadsheetToken = "shtcngRPoDYAi24x52j2nDuHMih"
+    feishu_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
+    spreadsheetToken = "shtcnp4SaJt37q6OOOrYzPMjQkg"
 
     # 获取飞书api token
     @classmethod
@@ -30,6 +32,7 @@ class Feishu:
 
         try:
             urllib3.disable_warnings()
+            time.sleep(1)
             response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
             tenant_access_token = response.json()["tenant_access_token"]
             return tenant_access_token
@@ -82,7 +85,7 @@ class Feishu:
             # valueRenderOption=FormattedValue 计算并格式化单元格;
             # valueRenderOption=Formula单元格中含有公式时返回公式本身;
             # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
-            "valueRenderOption": "ToString",
+            "valueRenderOption": "FormattedValue",
 
             # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
             "dateTimeRenderOption": "",
@@ -92,6 +95,7 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
+            time.sleep(0.5)
             r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
             response = json.loads(r.content.decode("utf8"))
             values = response["data"]["valueRanges"][0]["values"]
@@ -99,11 +103,15 @@ class Feishu:
         except Exception as e:
             Common.logger().error("读取工作表所有数据异常:{}", e)
 
-    # 工作表,插入
+    # 工作表,插入行或列
     @classmethod
-    def insert_columns(cls, sheetid):
+    def insert_columns(cls, sheetid, majordimension, startindex, endindex):
         """
-        插入行或列
+        工作表,插入行或列
+        :param sheetid: 哪张表
+        :param majordimension: 行或列,默认 ROWS ,可选 ROWS、COLUMNS
+        :param startindex: 开始的位置
+        :param endindex: 结束的位置
         :return:插入首行
         """
         url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
@@ -115,40 +123,28 @@ class Feishu:
         body = {
             "dimension": {
                 "sheetId": sheetid,
-                "majorDimension": "ROWS",  # 默认 ROWS ,可选 ROWS、COLUMNS
-                "startIndex": 1,  # 开始的位置
-                "endIndex": 2  # 结束的位置
+                "majorDimension": majordimension,  # 默认 ROWS ,可选 ROWS、COLUMNS
+                "startIndex": startindex,  # 开始的位置
+                "endIndex": endindex  # 结束的位置
             },
             "inheritStyle": "AFTER"  # BEFORE 或 AFTER,不填为不继承 style
         }
         try:
             urllib3.disable_warnings()
+            time.sleep(0.5)
             r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("插入空行:{}", r.json()["msg"])
+            Common.logger().info("插入空行或列:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("插入空行异常:{}", e)
+            Common.logger().error("插入空行或列异常:{}", e)
 
-    # 工作表,首行写入数据
+    # 工作表,写入数据
     @classmethod
-    def update_values(cls, sheetid, a1, b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1):
+    def update_values(cls, sheetid, ranges, values):
         """
         写入数据
         :param sheetid: 哪张工作表
-        :param a1: 单元格
-        :param b1: 单元格
-        :param c1: 单元格
-        :param d1: 单元格
-        :param e1: 单元格
-        :param f1: 单元格
-        :param g1: 单元格
-        :param h1: 单元格
-        :param i1: 单元格
-        :param j1: 单元格
-        :param k1: 单元格
-        :param l1: 单元格
-        :param m1: 单元格
-        :param n1: 单元格
-        :param o1: 单元格
+        :param ranges: 单元格范围
+        :param values: 更新值
         :return:
         """
 
@@ -160,19 +156,17 @@ class Feishu:
         body = {
             "valueRanges": [
                 {
-                    "range": sheetid + "!A2:O2",
-                    "values": [
-                        [a1, b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1]
-                    ]
+                    "range": sheetid + "!" + ranges,
+                    "values": values
                 },
             ],
         }
         try:
             urllib3.disable_warnings()
             r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("空行写入视频数据:{}", r.json()["msg"])
+            Common.logger().info("写入数据:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("空行写入视频数据异常:{}", e)
+            Common.logger().error("写入数据异常:{}", e)
 
     # 读取单元格数据
     @classmethod
@@ -194,7 +188,7 @@ class Feishu:
             # valueRenderOption=FormattedValue 计算并格式化单元格;
             # valueRenderOption=Formula单元格中含有公式时返回公式本身;
             # valueRenderOption=UnformattedValue计算但不对单元格进行格式化。
-            "valueRenderOption": "ToString",
+            "valueRenderOption": "FormattedValue",
 
             # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
             "dateTimeRenderOption": "",
@@ -204,6 +198,7 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
+            time.sleep(0.5)
             r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
             return r.json()["data"]["valueRange"]["values"][0]
         except Exception as e:
@@ -244,13 +239,13 @@ class Feishu:
 if __name__ == "__main__":
     feishu = Feishu()
 
-    # # 获取飞书api token
-    # feishu.get_token()
+    # 获取飞书api token
+    print(feishu.get_token())
     # # 获取表格元数据
     # feishu.get_metainfo()
 
     # 读取工作表中所有数据
-    # print(feishu.get_values_batch("Y8N3Vl"))
+    # print(feishu.get_values_batch("Zt2PGQ")[1][3])
     # print(len(feishu.get_values_batch("SdCHOM")))
     # for i in range(len(feishu.get_values_batch("Y8N3Vl"))):
     #     videoid = feishu.get_values_batch("Y8N3Vl")[i][1]
@@ -261,10 +256,6 @@ if __name__ == "__main__":
 
     # # 看一看+工作表,插入首行
     # print(feishu.insert_columns("Y8N3Vl"))
-    #
-    # # 看一看+工作表,首行写入数据
-    # print(feishu.update_values("Y8N3Vl", "a1", "b1", "c1", "d1", "e1", "f1", "g1",
-    #                            "h1", "i1", "j1", "k1", "l1", "m1", "n1", "o1"))
 
     # # 查询单元格内容
     # print(feishu.get_range_value("Y8N3Vl", "B8:C8"))

+ 3 - 3
main/publish.py

@@ -36,9 +36,9 @@ class Publish:
         versionCode  版本 默认1
         :return:
         """
-        Common.logger().info('publish request data: {}'.format(request_data))
+        # Common.logger().info('publish request data: {}'.format(request_data))
         result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
-        Common.logger().info('publish result: {}'.format(result))
+        # Common.logger().info('publish result: {}'.format(result))
         if result['code'] != 0:
             Common.logger().error('pushlish failure msg = {}'.format(result['msg']))
         else:
@@ -62,7 +62,7 @@ class Publish:
         :return:
         """
         result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
-        Common.logger().info('publish result: {}'.format(result))
+        # Common.logger().info('publish result: {}'.format(result))
         if result['code'] != 0:
             Common.logger().error('pushlish failure msg = {}'.format(result['msg']))
         else:

+ 8 - 40
main/run.py

@@ -12,58 +12,25 @@ from main.common import Common
 from main.download_kuaishou import KuaiShou
 
 
-def kuaishou_dev_job():
-    """
-    执行测试环境快手脚本
-    """
-    while True:
-        # 当天下载及上传的视频数:20 条
-        if len(KuaiShou.download_video_list) >= 10:
-            Common.logger().info("已下载视频数:{}".format(len(KuaiShou.download_video_list)))
-            time.sleep(1800)
-        else:
-            Common.logger().info("开始抓取快手视频")
-            time.sleep(1)
-            # 抓取符合规则的视频,写入 kuaishou_feeds.txt
-            KuaiShou.kuaishou_get_recommend()
-            # 下载视频,并上传
-            KuaiShou.kuaishou_download_play_video("dev")
-            # 随机睡眠1-3s
-            time.sleep(random.randint(1, 3))
-
-        # 删除冗余日志
-        Common.del_logs()
-
-
-def main_dev():
-    while True:
-        while True:
-            main_time = datetime.datetime.now()
-            if main_time.hour >= 10:
-                kuaishou_dev_job()
-            else:
-                break
-
-
 def kuaishou_prod_job():
     """
     执行正式环境快手脚本
     """
     while True:
+        prod_time = datetime.datetime.now()
         # 当天下载及上传的视频数:150 条
-        if len(KuaiShou.download_video_list) >= 150:
+        if prod_time.hour > 22 or prod_time.hour < 8:
             Common.logger().info("已下载视频数:{}".format(len(KuaiShou.download_video_list)))
             time.sleep(60)
             # 删除冗余日志
             Common.del_logs()
+            KuaiShou.download_video_list = []
             break
         else:
-            Common.logger().info("开始抓取快手视频")
-            time.sleep(1)
             # 抓取符合规则的视频,写入 kuaishou_feeds.txt
-            KuaiShou.kuaishou_get_recommend()
+            KuaiShou.get_feeds()
             # 下载视频,并上传
-            KuaiShou.kuaishou_download_play_video("prod")
+            KuaiShou.download_publish("prod")
             # 随机睡眠1-3s
             time.sleep(random.randint(1, 3))
 
@@ -73,14 +40,15 @@ def main_prod():
     正式环境主函数
     """
     while True:
+        Common.logger().info("开始抓取快手视频")
+        time.sleep(1)
         while True:
             main_time = datetime.datetime.now()
-            if main_time.hour >= 10:
+            if 8 <= main_time.hour <= 22:
                 kuaishou_prod_job()
             else:
                 break
 
 
 if __name__ == "__main__":
-    # main_dev()
     main_prod()

+ 4 - 0
requirements.txt

@@ -0,0 +1,4 @@
+loguru==0.6.0
+oss2==2.15.0
+requests==2.27.1
+urllib3==1.26.9

+ 19 - 1
抓取规则.txt

@@ -1,7 +1,25 @@
+==========2022/6/8===========
+一、按照关注账号进行抓取
+1、任务开始时间:
+- 每天早上8点-晚上22点
+2、抓取规则:
+  - 视频播放量点赞量5万+ ,分享量2000+
+  - 视频时长1分钟以上,10分钟以下
+  - 视频分辨率720以上
+  - 站内标题=快手视频原标题 (需要过滤掉标题中的话题#  #和@)
+  - 站内封面图=快手视频原封面图
+3、站内承接:
+- 每日入库100条视频(优先爬取最新达到标准的视频)
+- 视频随机分配到10个虚拟账号。uid列表:快手爬虫账号
+4、特别注意:
+- 视频需要排重,已经抓取过得视频,不要重复抓取
+- 需要对视频库进行持续扫描:如1条视频上周未达到5万+点赞,本周达到了5万点赞,则进行抓取。
+
+
 ==========2022/4/15===========
 一、按照数据指标抓取
 1、任务开始时间:
-- 每天早上8点-晚上21点
+- 每天早上8点-晚上22
 2、抓取规则:
   - 视频播放量点赞量5万+ ,分享量2000+
   - 视频时长1分钟以上,10分钟以下