Browse Source

增加:从用户主页下载视频

wangkun 3 years ago
parent
commit
bb058b0e67
11 changed files with 1217 additions and 503 deletions
  1. 98 2
      README.md
  2. 15 30
      main/common.py
  3. 81 138
      main/demo.py
  4. 127 83
      main/feishu_lib.py
  5. 676 0
      main/follow_list.py
  6. 50 39
      main/publish.py
  7. 123 136
      main/recommend_list.py
  8. 33 0
      main/run_follow.py
  9. 14 34
      main/run_recommend.py
  10. 0 4
      requirements.txt
  11. 0 37
      抓取规则.txt

+ 98 - 2
README.md

@@ -1,9 +1,105 @@
 快手和微视小程序的爬虫
+
 loguru==0.6.0
+
 oss2==2.15.0
+
 requests==2.27.1
+
 urllib3==1.26.9
+
 python==3.10.0
+
+
 执行入口:
-1. cd ./crawler-kuaishou-Windows
-2. python3 main/run.py 
+
+1. cd ./crawler_kuaishou
+
+2.python3 main/run_xxx.py 
+
+
+==========2022/7/8===========
+
+1.已下载视频表,增加:站内视频链接
+
+2.合并脚本:推荐榜 / 关注榜
+
+3.关注榜修改:从用户主页下载
+
+4.抓取/下载规则:点赞 >= 1W 或分享 >= 1K
+
+5.发布时间: 7 天内
+
+
+==========2022/6/8===========
+
+一、按照关注账号进行抓取
+
+1、任务开始时间:
+
+- 循环抓取,每次间隔 1 小时
+
+2、抓取规则:
+
+  - 视频发布时间 72 小时内 
+
+  - 视频播放量点赞量5万+ ,分享量2000+
+
+  - 视频时长1分钟以上,10分钟以下
+
+  - 视频分辨率720以上
+
+  - 站内标题=快手视频原标题 (需要过滤掉标题中的话题#  #和@)
+
+  - 站内封面图=快手视频原封面图
+
+3、站内承接:
+
+  - 每日入库100条视频(优先爬取最新达到标准的视频)
+
+  - 视频随机分配到10个虚拟账号。uid列表:快手爬虫账号
+
+4、特别注意:
+
+  - 视频需要排重,已经抓取过得视频,不要重复抓取
+
+  - 需要对视频库进行持续扫描:如1条视频上周未达到5万+点赞,本周达到了5万点赞,则进行抓取。
+
+
+==========2022/4/15===========
+
+一、按照数据指标抓取
+
+1、任务开始时间:
+
+- 每天早上8点-晚上22点
+- 
+2、抓取规则:
+
+  - 视频播放量点赞量5万+ ,分享量2000+
+
+  - 视频时长1分钟以上,10分钟以下
+
+  - 视频分辨率720以上
+
+  - 站内标题=快手视频原标题 (需要过滤掉标题中的话题#  #)
+
+  - 站内封面图=快手视频原封面图
+
+3、站内承接:
+
+  - 每日入库200条视频
+
+  - 视频随机分配到10个虚拟账号。
+
+4、特别注意:
+
+  - 视频需要排重,已经抓取过得视频,不要重复抓取
+
+  - 需要对视频库进行持续扫描:如1条视频上周未达到5万+点赞,本周达到了5万点赞,则进行抓取。
+
+5、新增爬虫视频标题过滤词
+
+  - 集结吧光合创作者、电影解说、快来露两手、分享家常美食教程、光合作者助手、创作者中心、创作者学院、娱乐星熠计划、解说电影、电影剪辑、放映室、老剧、影视剪辑、精彩片段、冬日影娱大作战、春日追剧计划单、影视解说、中视频影视混剪计划、众志成城共抗疫情、我在追好剧、娱乐星灿计划、电影、电视剧、毛泽东、毛主席、周恩来、林彪、习近平、习大大、彭丽媛、怀旧经典影视
+
+==============================

+ 15 - 30
main/common.py

@@ -27,7 +27,7 @@ class Common:
 
     # 使用 logger 模块生成日志
     @staticmethod
-    def logger():
+    def logger(log_type):
         """
         使用 logger 模块生成日志
         """
@@ -38,7 +38,12 @@ class Common:
             os.makedirs(log_path)
 
         # 日志文件名
-        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
+        if log_type == "recommend":
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-kuaishou-recommend.log'
+        elif log_type == "follow":
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-kuaishou-follow.log'
+        else:
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-kuaishou.log'
 
         # 日志不打印到控制台
         logger.remove(handler_id=None)
@@ -54,12 +59,12 @@ class Common:
 
     # 清除日志,保留最近 7 个文件
     @classmethod
-    def del_logs(cls):
+    def del_logs(cls, log_type):
         """
         清除冗余日志文件
         :return: 保留最近 7 个日志
         """
-        log_dir = r"./logs/"
+        log_dir = "./logs/"
         all_files = sorted(os.listdir(log_dir))
         all_logs = []
         for log in all_files:
@@ -72,11 +77,11 @@ class Common:
         else:
             for file in all_logs[:len(all_logs) - 7]:
                 os.remove(log_dir + file)
-        cls.logger().info("清除冗余日志成功")
+        cls.logger(log_type).info("清除冗余日志成功")
 
     # 封装下载视频或封面的方法
     @classmethod
-    def download_method(cls, text, d_name, d_url):
+    def download_method(cls, log_type, text, d_name, d_url):
         """
         下载封面:text == "cover" ; 下载视频:text == "video"
         需要下载的视频标题:d_title
@@ -102,9 +107,9 @@ class Common:
                 with open(video_dir + video_name, "wb") as f:
                     for chunk in response.iter_content(chunk_size=10240):
                         f.write(chunk)
-                cls.logger().info("==========视频下载完成==========")
+                cls.logger(log_type).info("==========视频下载完成==========")
             except Exception as e:
-                cls.logger().exception("视频下载失败:{}", e)
+                cls.logger(log_type).error("视频下载失败:{}", e)
 
         # 下载封面
         elif text == "cover":
@@ -119,29 +124,9 @@ class Common:
             try:
                 with open(video_dir + cover_name, "wb") as f:
                     f.write(response.content)
-                cls.logger().info("==========封面下载完成==========")
+                cls.logger(log_type).info("==========封面下载完成==========")
             except Exception as e:
-                cls.logger().exception("封面下载失败:{}", e)
-
-    # 读取 txt 内容,返回 f.readlines()
-    @staticmethod
-    def read_txt(t_name):
-        """
-        读取 txt 文件
-        :param t_name: 文件名
-        :return: 文件内容
-        """
-        with open(r"./txt/" + t_name, "r", encoding="UTF-8") as f:
-            return f.readlines()
-
-    # 统计 txt 内容数量
-    @classmethod
-    def kuaishou_download_count(cls):
-        videoid_path = r"./txt/kuaishou_videoid.txt"
-        count = 0
-        for count, line in enumerate(open(videoid_path, "rb").readlines()):
-            count += 1
-        cls.logger().info('累计下载视频数: {}\n', count)
+                cls.logger(log_type).error("封面下载失败:{}", e)
 
 
 if __name__ == "__main__":

+ 81 - 138
main/demo.py

@@ -12,6 +12,7 @@ import requests
 import urllib3
 
 from main.common import Common
+from main.feishu_lib import Feishu
 
 
 class Demo:
@@ -50,125 +51,11 @@ class Demo:
         print(type(tomorrow))
         print(f"明天:{tomorrow}")
 
-    @classmethod
-    def get_douyin_feeds(cls):
-        """
-        获取抖音feed流视频 https://www.douyin.com
-        """
-        url = "https://www.douyin.com/aweme/v1/web/tab/feed/?"
-        params = {
-            "device_platform": "webapp",
-            "aid": "6383",
-            "channel": "channel_pc_web",
-            "count": "10",
-            "refresh_index": "4",
-            "video_type_select": "0",
-            "version_code": "170400",
-            "version_name": "17.4.0",
-            "cookie_enabled": "true",
-            "screen_width": "1920",
-            "screen_height": "1080",
-            "browser_language": "zh-CN",
-            "browser_platform": "MacIntel",
-            "browser_name": "Chrome",
-            "browser_version": "99.0.4844.84",
-            "browser_online": "true",
-            "engine_name": "Blink",
-            "engine_version": "99.0.4844.84",
-            "os_name": "Mac OS",
-            "os_version": "10.15.7",
-            "platform": "PC",
-            "cpu_core_num": "8",
-            "device_memory": "8",
-            "downlink": "10",
-            "effective_type": "4g",
-            "round_trip_time": "50",
-            "msToken": "304uY1lV7HmHkR1G1QUaFqg0yrL5_WqrFOR8qCbl3hOsl8aSNI_18vIfpTGNhNRVZx7ysRiCHpcBKhpujTsbbC"
-                       "ZEDbG7pllZzlO3tlrBOs2TFYUgJdsvbw==",
-            "X-Bogus": "DFSzswVYPVsANat/Sl8eGc3WxM23",
-            "_signature": "qaJgTwAAy.aVqLslyfC7aKmiYF"
-        }
-        cookies = {
-            "_tea_utm_cache_6383": "undefined",
-            "ttwid": "1%7CETZk6sDMDSBgewWhKJXghFN4cwXTz0fLuhsLEngD_Nk%7C1648812136%7Cfa66fa81ccfe3f552f4"
-                     "e8b8327e72cbbc5e897141c25a5fcd32defaed1466d3e",
-            "passport_csrf_token": "e2d0f1ed9fd22463be9f389137a781ce",
-            "passport_csrf_token_default": "e2d0f1ed9fd22463be9f389137a781ce",
-            "s_v_web_id": "verify_l1h7nzwr_ABN0FA2f_BTrM_4zSH_8WPN_2KY2iZFmbhE2",
-            "_tea_utm_cache_1300": "undefined",
-            "_tea_utm_cache_2285": "undefined",
-            "ttcid": "3220eeda36a244beadd32a4b44d2044b31",
-            "douyin.com": "",
-            "__ac_nonce": "06247fb0f00f050ccc9b2",
-            "__ac_signature": "_02B4Z6wo00f01AN7DoAAAIDB5nv.qI7xGZQDWwoAAGKfo4rd5YCAYF8o5PyppIpsdKxV0k2NerO"
-                              "f1VEQr3eJftkpgon9tcveDVpmfY555vzTTvRznegS1ax3KJXnoav2ZdEoYzwR3wDszPCk5d",
-            "strategyABtestKey": "1648865029.449",
-            "AB_LOGIN_GUIDE_TIMESTAMP": "1648865029279",
-            "THEME_STAY_TIME": "299621",
-            "IS_HIDE_THEME_CHANGE": "1",
-            "home_can_add_dy_2_desktop": "0",
-            "tt_scid": "vUl8CBW1SMQp2l5GmUIja5A6ziY1LByrsoN.P-wvKuutiB8ftvlfK.9ZEeehNC5u821d",
-            "pwa_guide_count": "2",
-            "msToken": "EHCmp9Qw7PAChI3do-MQPjOR29hf4ZFLYNrGl89HkFKdO5Iwb8n7z5fpETrgim2zFTIkGT"
-                       "ObOxH7HCrHCLVEX5eAuwAS1A2sjKH4MHEfjfPqA06Lo4v9Pw==",
-        }
-        try:
-            urllib3.disable_warnings()
-            r = requests.get(url=url, params=params, cookies=cookies, verify=False)
-            # response = json.loads(r.content.decode("utf8"))
-            print(r)
-            print(type(r.text))
-            print(r.text)
-        except Exception as e:
-            print(e)
-
     @classmethod
     def demo2(cls):
         s = "0"
         print(int(int(s) / 10))
 
-    @classmethod
-    def get_weishi_feeds(cls):
-        url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
-        cookies = {
-            "wesee_authtype": "3",
-            "wesee_openid":	"oWGa05FrwkuUvT-4n1qGeQuhVsc8",
-            "wesee_openkey": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf189e2a5c1d532eeff172bc21cf2"
-                             "6230941ccbc10243a7879e8165ca608c17060de606a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
-            "wesee_personid": "1593522421826902",
-            "wesee_refresh_token": "",
-            "wesee_access_token": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf18"
-                                  "9e2a5c1d532eeff172bc21cf26230941ccbc10243a7879e8165ca608c17060de6"
-                                  "06a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
-            "wesee_thr_appid": "wx75ee9f19b93e5c46",
-            "wesee_ichid": "8"
-        }
-        json_data = {
-            "req_body": {
-                "requestType": 16,
-                "isrefresh": 0,
-                "isfirst": 0,
-                "attachInfo": "",
-                "scene_id": 22,
-                "requestExt": {
-                    "mini_openid": "oWGa05FrwkuUvT-4n1qGeQuhVsc8",
-                    "notLogin-personid": "1593522421826902"
-                }
-            },
-            "req_header": {
-                "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
-            }
-        }
-        try:
-            urllib3.disable_warnings()
-            r = requests.post(url=url, cookies=cookies, json=json_data, verify=False)
-            response = json.loads(r.content.decode("utf8"))
-            feeds = response["rsp_body"]["feeds"]
-            for feed in feeds:
-                print(feed)
-        except Exception as e:
-            print(e)
-
     @classmethod
     def edit_str(cls):
         title_list = ["#上海战疫 上海累计感染超20万!这条被淹没的热搜,令全网泪目… 疫情一定要攻克,但所有人都不该遗忘这些弱者。#上海累计报告本土阳性感染者超20万例 #农民工",
@@ -211,17 +98,6 @@ class Demo:
         # new_title = re.compile(r'(#)(.*)(#)')
         # print(new_title.sub(r'', title))
 
-    @classmethod
-    def kuaishou_sensitive_words(cls):
-        sensitive_words = [
-            "汽车",
-            "电影解说",
-            "放映室",
-            "解说电影",
-            "断供",
-        ]
-        return sensitive_words
-
     @classmethod
     def sensitive_words(cls):
         title_list = ["#上海战疫 上海累计感染超20万!这条被淹没的热搜,令全网泪目… 疫情一定要攻克,但所有人都不该遗忘这些弱者。#上海累计报告本土阳性感染者超20万例 #农民工",
@@ -283,21 +159,88 @@ class Demo:
         timeStamp = int(time.mktime(time.strptime(time1, "%Y/%m/%d %H:%M:%S")))
         print(timeStamp)
 
+    @classmethod
+    def get_sheet(cls):
+        sheet = Feishu.get_values_batch("follow", "kuaishou", "MGMsHE")
+        for i in range(1, len(sheet)):
+            uid = sheet[i][0]
+            print(uid)
+
+    # 小程序:关注/取消关注用户
+    @classmethod
+    def follow_unfollow(cls, log_type, is_follow):
+        try:
+            # 已关注的用户列表 uids
+            uid_sheet = Feishu.get_values_batch(log_type, "kuaishou", "MGMsHE")
+            for i in range(1, len(uid_sheet)):
+                uid = uid_sheet[i][0]
+                nick = uid_sheet[i][1]
+                url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/relation/follow?"
+                headers = {
+                    "content-type": "application/json",
+                    "Accept-Encoding": "gzip,compress,br,deflate",
+                    "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
+                                  ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
+                                  ' MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN',
+                    "Referer": "https://servicewechat.com/wx79a83b1a1e8a7978/595/page-frame.html"
+                }
+                params = {
+                    "__NS_sig3": "baaaeedda97f238b91e7e4e593b0bec8406b379ffbfbf9f9f6f7f4ee",
+                    "__NS_sig3_origin": "3sCt3iAAAAAAAAAAAAAAAwEQBv2b8ewCiEHD8iAAAAAJi0p4iunP2uUH /x7m67D394uxv2sVV0N0ZsfmbHMpA=="
+                }
+                cookies = {
+                    "did": "wxo_89af779e76d329da37ea78c277c8231d04c4",
+                    "preMinaVersion": "v3.109.0",
+                    "sid": "kuaishou.wechat.app",
+                    "appId": "ks_wechat_small_app_2",
+                    "clientid": "13",
+                    "client_key": "f60ac815",
+                    "kpn": "WECHAT_SMALL_APP",
+                    "kpf": "OUTSIDE_ANDROID_H5",
+                    "language": "zh_CN",
+                    "smallAppVersion": "v3.114.0",
+                    "session_key": "123063b798c706796d4e71a229547fee14dabdecb85d119c5daf99720d496e428f529d03bc9dc1a53a8fab307b723b98d4a41a129df1be81f6bd42648be78e2f1e3b51c82bb92220bd0a7fb0683326fe9e683d1dbc4477fa82ecf2d873e008d8292098cbf9b10ced28053001",
+                    "unionid": "V2:1230e9a914f904112e011801770f38cae60271569a62736a2ef09dd7ca29ae045e33e6f78bdefb82afeb667ed45d673aa4901a122bee54a2d0d74e96be5662a33544727a0aec2220a4eb707584694c0b041940f0508561ced8d6ff67cf9ec9b375e24c6cd059633a28053001",
+                    "eUserStableOpenId": "123064f8109ddf7d6c901e62f30735f32d4cd4a3612c91f64a97cac9d2e1cf78b4df71461899fded9683bcda048f882b74bd1a1230b7186912344668bc4685735a9da9d9724a2220bf6363e2c4cc19738f2206e8c8c64afc5b310866590ceeb44c41abd6bf4e1d1d28053001",
+                    "openId": "o5otV4_NGQt4gLzlk5CMDfTApN_g",
+                    "eOpenUserId": "124073d987dfc4dc5c544f8056bf54b83ff2d4a1042cbd3f974e088a05826320ff7b1655aacae3bad8629e09c3248978309c6642d9c1af5b8b65ef4ac4851bb8c9141a12bb92bd9c2290489ba7a733708a4a446de8382220dc87c9a1a5c3a0c359645e188828c8ae25ac716c35f90cb47df44362b760b8e928053001",
+                    "kuaishou.wechat.app_st": "ChZrdWFpc2hvdS53ZWNoYXQuYXBwLnN0EqAB6h_yylsMxiWgIgtBf1xjZYXDmsMvJ1B5J75XwcZ3X3CSJ6TCmJjZ8Mz1FFTQ7yEB3wrx3F40vuUtxKyn0Fsi4Jji2aTnKRHSQySNGSOiveeEWDyFnI9RqVxnaTrfMGF7jqBrqjk_ENl93vZTEL2wfYH1UlWIXiZoVNnBVcP_sXEpfr3dAZ3jUFXRG4XtVODrxpVOeF_gZTObCke6RoT-jBoSwdJwnczeQoOaE2Nj6iF-69fWIiCrxCOlzdzE2smcQ4sMv0VyLAHOTvQk9Y34pbuIsOjz7SgFMAE",
+                    "passToken": "ChNwYXNzcG9ydC5wYXNzLXRva2VuEpABDZ3HmbtEmdzkv2_Bg9ZVBF9DtJSeYLHFurm1wx-4NmeiF_l-AfaIuMtUa0LeVhYH4XqXYxksQgS0UYzqbKyk2UF-rWn9Z0x-fawxyWB6_-fdiPtwq2lJOTU4Dcd1jDhsxUkoxSCmIyhNHC-3TCDxvIsWwumW9gYpxL5rQOlxWRVilCQ9RomJWmirVumgkgLBGhKgKyzmNjRDxLfpDU5SPFhJmG0iIIfnORJeG0u1E8FQjuNKf7CP1hA372VOpLRiYaaJxCj-KAUwAQ",
+                    "userId": "1921947321"
+                }
+
+                if is_follow == "follow":
+                    ftype = 1
+                elif is_follow == "unfollow":
+                    ftype = 2
+                else:
+                    ftype = 1
+
+                json_text = {
+                    "touid": uid,
+                    "ftype": ftype,
+                    "page_ref": 84
+                }
+                urllib3.disable_warnings()
+                r = requests.post(url=url, headers=headers, cookies=cookies, params=params, json=json_text, verify=False)
+                if is_follow == "follow":
+                    if r.json()["result"] != 1:
+                        Common.logger(log_type).warning("{}", r.text)
+                    else:
+                        Common.logger(log_type).info("关注:{}, {}", nick, r)
+                else:
+                    if r.json()["result"] != 1:
+                        Common.logger(log_type).warning("{}", r.text)
+                    else:
+                        Common.logger(log_type).info("取消关注:{}, {}", nick, r)
+                time.sleep(1)
+        except Exception as e:
+            Common.logger(log_type).error("关注/取消关注异常:{}", e)
+
 
 if __name__ == "__main__":
     demo = Demo()
-    demo.time2()
-    # demo.download_video()
-    # demo.demo1()
-    # demo.time()
-    # demo.get_douyin_feeds()
-    # demo.demo2()
-    # demo.get_weishi_feeds()
-    # demo.edit_str()
-    # demo.sensitive_words()
-    # demo.logger().info("hello")
-    # time.sleep(10)
-    # a = "hahaha"
-    # demo.logger().exception("what:{}", a)
+    # demo.get_sheet()
+    demo.follow_unfollow("follow", "follow")
 
     # pass

+ 127 - 83
main/feishu_lib.py

@@ -2,7 +2,6 @@
 # @Author: wangkun
 # @Time: 2022/5/9
 import json
-import time
 
 import requests
 import urllib3
@@ -16,12 +15,37 @@ class Feishu:
     """
     编辑飞书云文档
     """
-    feishu_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
-    spreadsheetToken = "shtcnp4SaJt37q6OOOrYzPMjQkg"
+    # 看一看爬虫数据表
+    kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
+    # 快手爬虫数据表
+    kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
+    # 微视爬虫数据表
+    weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
+    # 小年糕爬虫数据表
+    xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
+    # 数据监控表
+    crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
+
+    # 飞书路径token
+    @classmethod
+    def spreadsheettoken(cls, crawler):
+        """
+        :param crawler: 哪个爬虫
+        """
+        if crawler == "kanyikan":
+            return "shtcngRPoDYAi24x52j2nDuHMih"
+        elif crawler == "kuaishou":
+            return "shtcnp4SaJt37q6OOOrYzPMjQkg"
+        elif crawler == "weishi":
+            return "shtcn5YSWg91JfVGzj0SFZIRRPh"
+        elif crawler == "xiaoniangao":
+            return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
+        elif crawler == "monitor":
+            return "shtcnlZWYazInhf7Z60jkbLRJyd"
 
     # 获取飞书api token
     @classmethod
-    def get_token(cls):
+    def get_token(cls, log_type):
         """
         获取飞书api token
         :return:
@@ -32,23 +56,24 @@ class Feishu:
 
         try:
             urllib3.disable_warnings()
-            time.sleep(1)
             response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
             tenant_access_token = response.json()["tenant_access_token"]
             return tenant_access_token
         except Exception as e:
-            Common.logger().error("获取飞书 api token 异常:{}", e)
+            Common.logger(log_type).error("获取飞书 api token 异常:{}", e)
 
     # 获取表格元数据
     @classmethod
-    def get_metainfo(cls):
+    def get_metainfo(cls, log_type, crawler):
         """
         获取表格元数据
         :return:
         """
-        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/metainfo"
+        get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                           + cls.spreadsheettoken(crawler) + "/metainfo"
+
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -57,24 +82,26 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
-            r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
             response = json.loads(r.content.decode("utf8"))
             return response
         except Exception as e:
-            Common.logger().error("获取表格元数据异常:{}", e)
+            Common.logger(log_type).error("获取表格元数据异常:{}", e)
 
     # 读取工作表中所有数据
     @classmethod
-    def get_values_batch(cls, sheetid):
+    def get_values_batch(cls, log_type, crawler, sheetid):
         """
         读取工作表中所有数据
+        :param log_type: 启用哪个 log
+        :param crawler: 哪个爬虫
         :param sheetid: 哪张表
         :return: 所有数据
         """
-
-        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/values_batch_get"
+        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                               + cls.spreadsheettoken(crawler) + "/values_batch_get"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
@@ -85,7 +112,7 @@ class Feishu:
             # valueRenderOption=FormattedValue 计算并格式化单元格;
             # valueRenderOption=Formula单元格中含有公式时返回公式本身;
             # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
-            "valueRenderOption": "FormattedValue",
+            "valueRenderOption": "ToString",
 
             # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
             "dateTimeRenderOption": "",
@@ -95,29 +122,30 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
-            time.sleep(0.5)
-            r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+            r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
+            # print(r.text)
             response = json.loads(r.content.decode("utf8"))
             values = response["data"]["valueRanges"][0]["values"]
             return values
         except Exception as e:
-            Common.logger().error("读取工作表所有数据异常:{}", e)
+            Common.logger(log_type).error("读取工作表所有数据异常:{}", e)
 
     # 工作表,插入行或列
     @classmethod
-    def insert_columns(cls, sheetid, majordimension, startindex, endindex):
+    def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
         """
-        工作表,插入行或列
-        :param sheetid: 哪张表
-        :param majordimension: 行或列,默认 ROWS ,可选 ROWS、COLUMNS
-        :param startindex: 开始的位置
-        :param endindex: 结束的位置
-        :return:插入首行
+        工作表插入行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param majordimension:行或者列, ROWS、COLUMNS
+        :param startindex:开始位置
+        :param endindex:结束位置
         """
-        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
-              + cls.spreadsheetToken + "/insert_dimension_range"
+        insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                             + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -131,26 +159,26 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
-            time.sleep(0.5)
-            r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("插入空行或列:{}", r.json()["msg"])
+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("插入行或列:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("插入行或列异常:{}", e)
+            Common.logger(log_type).error("插入行或列异常:{}", e)
 
-    # 工作表,写入数据
+    # 写入数据
     @classmethod
-    def update_values(cls, sheetid, ranges, values):
+    def update_values(cls, log_type, crawler, sheetid, ranges, values):
         """
         写入数据
-        :param sheetid: 哪张工作表
-        :param ranges: 单元格范围
-        :param values: 更新值
-        :return:
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param ranges:单元格范围
+        :param values:写入的具体数据,list
         """
-
-        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/values_batch_update"
+        update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                            + cls.spreadsheettoken(crawler) + "/values_batch_update"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -161,33 +189,65 @@ class Feishu:
                 },
             ],
         }
+
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("写入数据:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger(log_type).error("写入数据异常:{}", e)
+
+    # 合并单元格
+    @classmethod
+    def merge_cells(cls, log_type, crawler, sheetid, ranges):
+        """
+        合并单元格
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid:哪张工作表
+        :param ranges:需要合并的单元格范围
+        """
+        merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                          + cls.spreadsheettoken(crawler) + "/merge_cells"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+
+        body = {
+            "range": sheetid + "!" + ranges,
+            "mergeType": "MERGE_ROWS"
+        }
+
         try:
             urllib3.disable_warnings()
-            r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("写入数据:{}", r.json()["msg"])
+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("合并单元格:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("写入数据异常:{}", e)
+            Common.logger(log_type).error("合并单元格异常:{}", e)
 
     # 读取单元格数据
     @classmethod
-    def get_range_value(cls, sheetid, cell):
+    def get_range_value(cls, log_type, crawler, sheetid, cell):
         """
         读取单元格内容
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
         :param sheetid: 哪张工作表
         :param cell: 哪个单元格
         :return: 单元格内容
         """
-        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
-              + cls.spreadsheetToken + "/values/" + sheetid + "!" + cell
+        get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         params = {
             # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
             # valueRenderOption=FormattedValue 计算并格式化单元格;
-            # valueRenderOption=Formula单元格中含有公式时返回公式本身;
-            # valueRenderOption=UnformattedValue计算但不对单元格进行格式化。
+            # valueRenderOption=Formula 单元格中含有公式时返回公式本身;
+            # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
             "valueRenderOption": "FormattedValue",
 
             # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
@@ -198,26 +258,29 @@ class Feishu:
         }
         try:
             urllib3.disable_warnings()
-            time.sleep(0.5)
-            r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
+            # print(r.text)
             return r.json()["data"]["valueRange"]["values"][0]
         except Exception as e:
-            Common.logger().error("读取单元格数据异常:{}", e)
+            Common.logger(log_type).error("读取单元格数据异常:{}", e)
 
     # 删除行或列,可选 ROWS、COLUMNS
     @classmethod
-    def dimension_range(cls, sheetid, major_dimension, startindex, endindex):
+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
         """
         删除行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
         :param sheetid:工作表
         :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
         :param startindex:开始的位置
         :param endindex:结束的位置
         :return:
         """
-        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/dimension_range"
+        dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/dimension_range"
         headers = {
-            "Authorization": "Bearer " + cls.get_token(),
+            "Authorization": "Bearer " + cls.get_token(log_type),
             "Content-Type": "application/json; charset=utf-8"
         }
         body = {
@@ -230,37 +293,18 @@ class Feishu:
             }
         try:
             urllib3.disable_warnings()
-            r = requests.delete(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            Common.logger().info("删除视频数据:{}", r.json()["msg"])
+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("删除视频数据:{}", r.json()["msg"])
         except Exception as e:
-            Common.logger().error("删除视频数据异常:{}", e)
+            Common.logger(log_type).error("删除视频数据异常:{}", e)
 
 
 if __name__ == "__main__":
     feishu = Feishu()
 
-    # 获取飞书api token
-    print(feishu.get_token())
-    # # 获取表格元数据
-    # feishu.get_metainfo()
-
-    # 读取工作表中所有数据
-    # print(feishu.get_values_batch("Zt2PGQ")[1][3])
-    # print(len(feishu.get_values_batch("SdCHOM")))
-    # for i in range(len(feishu.get_values_batch("Y8N3Vl"))):
-    #     videoid = feishu.get_values_batch("Y8N3Vl")[i][1]
-    #     if videoid == "b3":
-    #         # 删除行或列,可选 ROWS、COLUMNS
-    #         feishu.dimension_range("Y8N3Vl", "ROWS", i+1, i+1)
-    #         print(videoid)
-
-    # # 看一看+工作表,插入首行
-    # print(feishu.insert_columns("Y8N3Vl"))
-
-    # # 查询单元格内容
-    # print(feishu.get_range_value("Y8N3Vl", "B8:C8"))
-    #
-    # # 删除行或列,可选 ROWS、COLUMNS
-    # feishu.dimension_range("Y8N3Vl", "ROWS")
+    # print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B4:B4")[0])
+    # print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"])
+    # print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B6:B6")[0])
+    # print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B7:B7")[0])
 
     pass

+ 676 - 0
main/follow_list.py

@@ -0,0 +1,676 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/7/7
+import os
+import random
+import sys
+import time
+import requests
+import urllib3
+
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.feishu_lib import Feishu
+from main.publish import Publish
+
+proxies = {"http": None, "https": None}
+
+
+class Follow:
+    # 小程序:关注列表翻页参数
+    follow_pcursor = ""
+    # 小程序:个人主页视频列表翻页参数
+    person_pcursor = ""
+    # 视频发布时间
+    send_time = 0
+    # 配置微信
+    wechat_sheet = Feishu.get_values_batch("follow", "kuaishou", "f1R7Mx")
+    Referer = wechat_sheet[2][3]
+    NS_sig3 = wechat_sheet[3][3]
+    NS_sig3_origin = wechat_sheet[4][3]
+    did = wechat_sheet[5][3]
+    session_key = wechat_sheet[6][3]
+    unionid = wechat_sheet[7][3]
+    eUserStableOpenId = wechat_sheet[8][3]
+    openId = wechat_sheet[9][3]
+    eOpenUserId = wechat_sheet[10][3]
+    kuaishou_wechat_app_st = wechat_sheet[11][3]
+    passToken = wechat_sheet[12][3]
+    userId = wechat_sheet[13][3]
+
+    # 过滤敏感词
+    @classmethod
+    def sensitive_words(cls):
+        # 敏感词库列表
+        word_list = []
+        # 从云文档读取所有敏感词,添加到词库列表
+        lists = Feishu.get_values_batch("follow", "kuaishou", "fn8IDi")
+        for i in lists:
+            for j in i:
+                # 过滤空的单元格内容
+                if j is None:
+                    pass
+                else:
+                    word_list.append(j)
+        return word_list
+
+    # 下载规则
+    @staticmethod
+    def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt):
+        """
+        下载视频的基本规则
+        :param d_duration: 时长
+        :param d_width: 宽
+        :param d_height: 高
+        :param d_play_cnt: 播放量
+        :param d_like_cnt: 点赞量
+        :param d_share_cnt: 分享量
+        :return: 满足规则,返回 True;反之,返回 False
+        """
+        if int(float(d_duration)) >= 60:
+            if int(d_width) >= 720 or int(d_height) >= 720:
+                if int(d_play_cnt) >= 5000:
+                    if int(d_like_cnt) >= 10000 or int(d_share_cnt) >= 1000:
+                        return True
+                    else:
+                        return False
+                else:
+                    return False
+            else:
+                return False
+        else:
+            return False
+
+    # 从小程序中,关注用户列表同步至云文档
+    @classmethod
+    def get_sub_or_fans_list(cls, log_type):
+        try:
+            follow_list = []
+            follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "MGMsHE")
+            url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/relation/fol?"
+            headers = {
+                "content-type": "application/json",
+                "Accept-Encoding": "gzip,compress,br,deflate",
+                "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
+                              ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
+                              ' MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN',
+                "Referer": str(cls.Referer),
+            }
+            params = {
+                "__NS_sig3": str(cls.NS_sig3),
+                "__NS_sig3_origin": str(cls.NS_sig3_origin)
+            }
+            cookies = {
+                "did": str(cls.did),
+                "preMinaVersion": "v3.109.0",
+                "sid": "kuaishou.wechat.app",
+                "appId": "ks_wechat_small_app_2",
+                "clientid": "13",
+                "client_key": "f60ac815",
+                "kpn": "WECHAT_SMALL_APP",
+                "kpf": "OUTSIDE_ANDROID_H5",
+                "language": "zh_CN",
+                "smallAppVersion": "v3.114.0",
+                "session_key": str(cls.session_key),
+                "unionid": str(cls.unionid),
+                "eUserStableOpenId": str(cls.eUserStableOpenId),
+                "openId": str(cls.openId),
+                "eOpenUserId": str(cls.eOpenUserId),
+                "kuaishou.wechat.app_st": str(cls.kuaishou_wechat_app_st),
+                "passToken": str(cls.passToken),
+                "userId": str(cls.userId)
+            }
+            json_text = {
+                "count": 20,
+                "pcursor": str(cls.follow_pcursor),
+                "ftype": 1
+            }
+            urllib3.disable_warnings()
+            r = requests.post(url=url, headers=headers, params=params,
+                              cookies=cookies, json=json_text, proxies=proxies, verify=False)
+            if "fols" not in r.json():
+                Common.logger(log_type).warning("从小程序中获取关注用户列表:{}", r.text)
+            else:
+                users = r.json()["fols"]
+                for i in range(len(users)):
+                    uid = users[i]["targetId"]
+                    nick = users[i]["targetName"]
+                    sex = users[i]["targetSex"]
+                    description = users[i]["targetUserText"]
+                    if "followReason" in users[i]:
+                        follow_reason = users[i]["followReason"]
+                    else:
+                        follow_reason = ""
+                    follow_time = users[i]["time"]
+                    is_friend = users[i]["isFriend"]
+                    # print(f"uid:{uid}")
+                    follow_list.append(uid)
+                    # print(f"follow_list:{follow_list}")
+                    # 同步已关注的用户至云文档
+                    if uid not in [j for i in follow_sheet for j in i]:
+                        time.sleep(1)
+                        Feishu.insert_columns(log_type, "kuaishou", "MGMsHE", "ROWS", 1, 2)
+                        time.sleep(1)
+                        values = [[uid, nick, sex, description, follow_reason, follow_time, str(is_friend)]]
+                        Feishu.update_values(log_type, "kuaishou", "MGMsHE", "A2:L2", values)
+                    else:
+                        Common.logger(log_type).info("用户:{},在云文档中已存在", nick)
+            cls.follow_pcursor = r.json()["pcursor"]
+            # 翻页,直至到底了
+            if cls.follow_pcursor != "no_more":
+                cls.get_sub_or_fans_list(log_type)
+            else:
+                Common.logger(log_type).info("从小程序中同步关注用户至云文档完成\n")
+        except Exception as e:
+            Common.logger(log_type).error("从小程序中,关注用户列表同步至云文档异常:{}", e)
+
+    # 从云文档获取关注用户列表
+    @classmethod
+    def get_follow_users(cls, log_type):
+        try:
+            follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "MGMsHE")
+            if len(follow_sheet) == 1:
+                Common.logger(log_type).info("暂无关注用户")
+            else:
+                follow_list = []
+                nick_list = []
+                for i in range(1, len(follow_sheet)):
+                    uid = follow_sheet[i][0]
+                    nick = follow_sheet[i][1]
+                    nick_list.append(nick)
+                    follow_list.append(uid)
+                Common.logger(log_type).info("关注用户列表:{}", nick_list)
+                return follow_list
+        except Exception as e:
+            Common.logger(log_type).error("从云文档获取关注用户列表异常:{}", e)
+
+    # 从云文档获取取消关注用户列表
+    @classmethod
+    def get_unfollow_users(cls, log_type):
+        try:
+            unfollow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "Lms83Y")
+            if len(unfollow_sheet) == 1:
+                Common.logger(log_type).info("暂无取消关注用户")
+            else:
+                unfollow_list = []
+                nick_list = []
+                for i in range(1, len(unfollow_sheet)):
+                    uid = unfollow_sheet[i][0]
+                    nick = unfollow_sheet[i][1]
+                    nick_list.append(nick)
+                    unfollow_list.append(uid)
+                Common.logger(log_type).info("取消关注用户列表:{}", nick_list)
+                return unfollow_list
+        except Exception as e:
+            Common.logger(log_type).error("从云文档获取取消关注用户列表异常:{}", e)
+
+    # 小程序:关注/取消关注用户
+    @classmethod
+    def follow_unfollow(cls, log_type, is_follow, uid):
+        try:
+            url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/relation/follow?"
+            headers = {
+                "content-type": "application/json",
+                "Accept-Encoding": "gzip,compress,br,deflate",
+                "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
+                              ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
+                              ' MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN',
+                "Referer": str(cls.Referer),
+            }
+            params = {
+                "__NS_sig3": str(cls.NS_sig3),
+                "__NS_sig3_origin": str(cls.NS_sig3_origin)
+            }
+            cookies = {
+                "did": str(cls.did),
+                "preMinaVersion": "v3.109.0",
+                "sid": "kuaishou.wechat.app",
+                "appId": "ks_wechat_small_app_2",
+                "clientid": "13",
+                "client_key": "f60ac815",
+                "kpn": "WECHAT_SMALL_APP",
+                "kpf": "OUTSIDE_ANDROID_H5",
+                "language": "zh_CN",
+                "smallAppVersion": "v3.114.0",
+                "session_key": str(cls.session_key),
+                "unionid": str(cls.unionid),
+                "eUserStableOpenId": str(cls.eUserStableOpenId),
+                "openId": str(cls.openId),
+                "eOpenUserId": str(cls.eOpenUserId),
+                "kuaishou.wechat.app_st": str(cls.kuaishou_wechat_app_st),
+                "passToken": str(cls.passToken),
+                "userId": str(cls.userId)
+            }
+
+            if is_follow == "follow":
+                ftype = 1
+            elif is_follow == "unfollow":
+                ftype = 2
+            else:
+                ftype = 1
+
+            json_text = {
+                "touid": uid,
+                "ftype": ftype,
+                "page_ref": 84
+            }
+            r = requests.post(url=url, headers=headers, cookies=cookies, params=params, json=json_text)
+            if is_follow == "follow":
+                if r.json()["result"] != 1:
+                    Common.logger(log_type).warning("{}", r.text)
+                else:
+                    Common.logger(log_type).info("关注:{}, {}", uid, r)
+            else:
+                if r.json()["result"] != 1:
+                    Common.logger(log_type).warning("{}", r.text)
+                else:
+                    Common.logger(log_type).info("取消关注:{}, {}", uid, r)
+        except Exception as e:
+            Common.logger(log_type).error("关注/取消关注异常:{}", e)
+
+    # 获取个人主页视频
+    @classmethod
+    def get_videos_from_person(cls, log_type, uid):
+        try:
+            time.sleep(1)
+            url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/profile?"
+            headers = {
+                "content-type": "application/json",
+                "Accept-Encoding": "gzip,compress,br,deflate",
+                "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
+                              ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
+                              ' MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN',
+                "Referer": str(cls.Referer),
+            }
+            params = {
+                "__NS_sig3": str(cls.NS_sig3),
+                "__NS_sig3_origin": str(cls.NS_sig3_origin)
+            }
+            cookies = {
+                "did": str(cls.did),
+                "preMinaVersion": "v3.109.0",
+                "sid": "kuaishou.wechat.app",
+                "appId": "ks_wechat_small_app_2",
+                "clientid": "13",
+                "client_key": "f60ac815",
+                "kpn": "WECHAT_SMALL_APP",
+                "kpf": "OUTSIDE_ANDROID_H5",
+                "language": "zh_CN",
+                "smallAppVersion": "v3.114.0",
+                "session_key": str(cls.session_key),
+                "unionid": str(cls.unionid),
+                "eUserStableOpenId": str(cls.eUserStableOpenId),
+                "openId": str(cls.openId),
+                "eOpenUserId": str(cls.eOpenUserId),
+                "kuaishou.wechat.app_st": str(cls.kuaishou_wechat_app_st),
+                "passToken": str(cls.passToken),
+                "userId": str(cls.userId)
+            }
+            json_text = {
+                "count": 12,
+                "pcursor": str(cls.person_pcursor),
+                "eid": uid
+            }
+            urllib3.disable_warnings()
+            r = requests.post(url=url, headers=headers, params=params, cookies=cookies,
+                              json=json_text, proxies=proxies, verify=False)
+            # Common.logger(log_type).info("response:{}", r.text)
+            if "feeds" not in r.json():
+                Common.logger(log_type).warning("response:{}", r.text)
+            feeds = r.json()["feeds"]
+            if len(feeds) == 0:
+                Common.logger(log_type).warning("用户主页无视频\n")
+            for i in range(len(feeds)):
+                # 视频标题过滤话题及处理特殊字符
+                kuaishou_title = feeds[i]["caption"]
+                title_split1 = kuaishou_title.split(" #")
+                if title_split1[0] != "":
+                    title1 = title_split1[0]
+                else:
+                    title1 = title_split1[-1]
+
+                title_split2 = title1.split(" #")
+                if title_split2[0] != "":
+                    title2 = title_split2[0]
+                else:
+                    title2 = title_split2[-1]
+
+                title_split3 = title2.split("@")
+                if title_split3[0] != "":
+                    title3 = title_split3[0]
+                else:
+                    title3 = title_split3[-1]
+
+                video_title = title3.strip().replace("\n", "") \
+                    .replace("/", "").replace("快手", "").replace(" ", "") \
+                    .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
+                    .replace("#", "").replace(".", "。").replace("\\", "") \
+                    .replace(":", "").replace("*", "").replace("?", "") \
+                    .replace("?", "").replace('"', "").replace("<", "") \
+                    .replace(">", "").replace("|", "").replace("@", "")
+
+                if "photoId" not in feeds[i]:
+                    video_id = "0"
+                else:
+                    video_id = feeds[i]["photoId"]
+
+                if "viewCount" not in feeds[i]:
+                    video_play_cnt = "0"
+                else:
+                    video_play_cnt = feeds[i]["viewCount"]
+
+                if "likeCount" not in feeds[i]:
+                    video_like_cnt = "0"
+                else:
+                    video_like_cnt = feeds[i]["likeCount"]
+
+                if "shareCount" not in feeds[i]:
+                    video_share_cnt = "0"
+                else:
+                    video_share_cnt = feeds[i]["shareCount"]
+
+                if "commentCount" not in feeds[i]:
+                    video_comment_cnt = "0"
+                else:
+                    video_comment_cnt = feeds[i]["commentCount"]
+
+                if "duration" not in feeds[i]:
+                    video_duration = "0"
+                else:
+                    video_duration = int(int(feeds[i]["duration"]) / 1000)
+
+                if "width" not in feeds[i] or "height" not in feeds[i]:
+                    video_width = "0"
+                    video_height = "0"
+                else:
+                    video_width = feeds[i]["width"]
+                    video_height = feeds[i]["height"]
+
+                if "timestamp" not in feeds[i]:
+                    video_send_time = "0"
+                else:
+                    video_send_time = feeds[i]["timestamp"]
+                cls.send_time = int(int(video_send_time) / 1000)
+
+                if "userName" not in feeds[i]:
+                    user_name = "0"
+                else:
+                    user_name = feeds[i]["userName"].strip().replace("\n", "") \
+                        .replace("/", "").replace("快手", "").replace(" ", "") \
+                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
+
+                if "userId" not in feeds[i]:
+                    user_id = "0"
+                else:
+                    user_id = feeds[i]["userId"]
+
+                if "headUrl" not in feeds[i]:
+                    head_url = "0"
+                else:
+                    head_url = feeds[i]["headUrl"]
+
+                if "webpCoverUrls" in feeds[i]:
+                    cover_url = feeds[i]["webpCoverUrls"][-1]["url"]
+                elif "coverUrls" not in feeds[i]:
+                    cover_url = "0"
+                elif len(feeds[i]["coverUrls"]) == 0:
+                    cover_url = "0"
+                else:
+                    cover_url = feeds[i]["coverUrls"][0]["url"]
+
+                if "mainMvUrls" not in feeds[i]:
+                    video_url = "0"
+                elif len(feeds[i]["mainMvUrls"]) == 0:
+                    video_url = "0"
+                else:
+                    video_url = feeds[i]["mainMvUrls"][0]["url"]
+
+                Common.logger(log_type).info("video_title:{}".format(video_title))
+                Common.logger(log_type).info("user_name:{}".format(user_name))
+                Common.logger(log_type).info("video_id:{}".format(video_id))
+                Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
+                Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
+                Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
+                # Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
+                Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
+                # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
+                Common.logger(log_type).info("video_send_time:{}".format(
+                    time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
+                # Common.logger(log_type).info("user_id:{}".format(user_id))
+                # Common.logger(log_type).info("head_url:{}".format(head_url))
+                # Common.logger(log_type).info("cover_url:{}".format(cover_url))
+                Common.logger(log_type).info("video_url:{}".format(video_url))
+
+                # 过滤无效视频
+                if video_id == "0" \
+                        or head_url == "0" \
+                        or cover_url == "0" \
+                        or video_url == "0" \
+                        or video_duration == "0" \
+                        or video_send_time == "0" \
+                        or user_name == "0" \
+                        or user_id == "0" \
+                        or video_title == "":
+                    Common.logger(log_type).info("无效视频\n")
+                # 视频发布时间 <= 7 天
+                elif int(time.time()) - int(int(video_send_time) / 1000) > 604800:
+                    Common.logger("follow").info("发布时间:{},超过7天\n", time.strftime(
+                        "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
+                    cls.person_pcursor = ""
+                    break
+                # 判断敏感词
+                elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
+                    Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title))
+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c
+                elif video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3b207c") for j in m]:
+                    Common.logger(log_type).info("该视频已下载:{}\n", video_title)
+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=bnj9op
+                elif video_id in [j for n in Feishu.get_values_batch(log_type, "kuaishou", "bnj9op") for j in n]:
+                    Common.logger(log_type).info("该视频已在feeds中:{}\n", video_title)
+                else:
+                    Feishu.insert_columns("follow", "kuaishou", "bnj9op", "ROWS", 1, 2)
+                    # 获取当前时间
+                    get_feeds_time = int(time.time())
+                    # 工作表中写入数据
+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
+                               "用户主页",
+                               video_id,
+                               video_title,
+                               video_play_cnt,
+                               video_comment_cnt,
+                               video_like_cnt,
+                               video_share_cnt,
+                               video_duration,
+                               str(video_width) + "*" + str(video_height),
+                               time.strftime(
+                                   "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
+                               user_name,
+                               user_id,
+                               head_url,
+                               cover_url,
+                               video_url]]
+                    # 等待 1s,防止操作云文档太频繁,导致报错
+                    time.sleep(1)
+                    Feishu.update_values("follow", "kuaishou", "bnj9op", "A2:P2", values)
+                    Common.logger("follow").info("添加视频至follow_feeds成功:{}\n", video_title)
+
+                # 翻页
+                cls.person_pcursor = r.json()["pcursor"]
+                # 视频发布时间 <= 7 天
+                if int(time.time()) - int(cls.send_time) <= 604800:
+                    cls.get_videos_from_person(log_type, uid)
+                else:
+                    cls.person_pcursor = ""
+                    return
+
+        except Exception as e:
+            Common.logger(log_type).error("获取个人主页视频异常:{}\n", e)
+
+    # 获取所有关注列表的用户视频
+    @classmethod
+    def get_videos_from_follow(cls, log_type):
+        try:
+            # 已关注的用户列表 uids
+            uid_sheet = Feishu.get_values_batch(log_type, "kuaishou", "MGMsHE")
+            for i in range(1, len(uid_sheet)):
+                uid = uid_sheet[i][0]
+                nick = uid_sheet[i][1]
+                Common.logger(log_type).info("获取用户:{}主页视频\n", nick)
+                cls.get_videos_from_person(log_type, uid)
+                time.sleep(random.randint(3, 5))
+        except Exception as e:
+            Common.logger(log_type).error("获取用户主页视频异常:{}", e)
+
+    # 下载/上传
+    @classmethod
+    def download_publish(cls, log_type, env):
+        try:
+            follow_feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "bnj9op")
+            for i in range(1, len(follow_feeds_sheet)):
+                time.sleep(1)
+                download_video_id = follow_feeds_sheet[i][2]
+                download_video_title = follow_feeds_sheet[i][3]
+                download_video_play_cnt = follow_feeds_sheet[i][4]
+                download_video_comment_cnt = follow_feeds_sheet[i][5]
+                download_video_like_cnt = follow_feeds_sheet[i][6]
+                download_video_share_cnt = follow_feeds_sheet[i][7]
+                download_video_duration = follow_feeds_sheet[i][8]
+                download_video_resolution = follow_feeds_sheet[i][9]
+                download_video_send_time = follow_feeds_sheet[i][10]
+                download_user_name = follow_feeds_sheet[i][11]
+                download_user_id = follow_feeds_sheet[i][12]
+                download_head_url = follow_feeds_sheet[i][13]
+                download_cover_url = follow_feeds_sheet[i][14]
+                download_video_url = follow_feeds_sheet[i][15]
+
+                Common.logger(log_type).info("正在判断第{}行,视频:{}", i + 1, download_video_title)
+
+                # Common.logger(log_type).info("download_video_id:{}", download_video_id)
+                # Common.logger(log_type).info("download_video_title:{}", download_video_title)
+                # Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
+                # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
+                # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
+                # Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
+                # Common.logger(log_type).info("download_video_duration:{}", download_video_duration)
+                # Common.logger(log_type).info("download_video_resolution:{}", download_video_resolution)
+                # Common.logger(log_type).info("download_video_width:{}", download_video_width)
+                # Common.logger(log_type).info("download_video_height:{}", download_video_height)
+                # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
+                # Common.logger(log_type).info("download_user_name:{}", download_user_name)
+                # Common.logger(log_type).info("download_user_id:{}", download_user_id)
+                # Common.logger(log_type).info("download_head_url:{}", download_head_url)
+                # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
+                # Common.logger(log_type).info("download_video_url:{}", download_video_url)
+
+                # 过滤空行及空标题视频
+                if download_video_id is None \
+                        or download_video_id == "" \
+                        or download_video_title is None \
+                        or download_video_title == "":
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range(log_type, "kuaishou", "bnj9op", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).warning("标题为空或空行,删除成功\n")
+                    return
+                # 下载规则
+                elif cls.download_rule(
+                        download_video_duration, download_video_resolution.split("*")[0],
+                        download_video_resolution.split("*")[-1], download_video_play_cnt, download_video_like_cnt,
+                        download_video_share_cnt) is False:
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range(log_type, "kuaishou", "bnj9op", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("不满足下载规则:{},删除成功\n", download_video_title)
+                    return
+                # 从已下载视频表中去重
+                elif download_video_id in [j for m in Feishu.get_values_batch(
+                        log_type, "kuaishou", "3b207c") for j in m]:
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range(log_type, "kuaishou", "bnj9op", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("视频已下载:{},删除成功\n", download_video_title)
+                    return
+                else:
+                    # 下载封面
+                    Common.download_method(log_type=log_type, text="cover",
+                                           d_name=str(download_video_title), d_url=str(download_cover_url))
+                    # 下载视频
+                    Common.download_method(log_type=log_type, text="video",
+                                           d_name=str(download_video_title), d_url=str(download_video_url))
+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                    with open("./videos/" + download_video_title + "/" + "info.txt",
+                              "a", encoding="UTF-8") as f_a:
+                        f_a.write(str(download_video_id) + "\n" +
+                                  str(download_video_title) + "\n" +
+                                  str(download_video_duration) + "\n" +
+                                  str(download_video_play_cnt) + "\n" +
+                                  str(download_video_comment_cnt) + "\n" +
+                                  str(download_video_like_cnt) + "\n" +
+                                  str(download_video_share_cnt) + "\n" +
+                                  str(download_video_resolution) + "\n" +
+                                  str(int(time.mktime(
+                                      time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
+                                  str(download_user_name) + "\n" +
+                                  str(download_head_url) + "\n" +
+                                  str(download_video_url) + "\n" +
+                                  str(download_cover_url) + "\n" +
+                                  "kuaishou_person")
+                    Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
+
+                    # 上传视频
+                    Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
+                    our_video_id = Publish.upload_and_publish(log_type, env, "play")
+                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
+                    Common.logger(log_type).info("视频上传完成:{}", download_video_title)
+
+                    # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c
+                    Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
+                    # 视频ID工作表,插入首行
+                    Feishu.insert_columns(log_type, "kuaishou", "3b207c", "ROWS", 1, 2)
+                    # 视频ID工作表,首行写入数据
+                    upload_time = int(time.time())
+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                               "用户主页",
+                               str(download_video_id),
+                               str(download_video_title),
+                               our_video_link,
+                               download_video_play_cnt,
+                               download_video_comment_cnt,
+                               download_video_like_cnt,
+                               download_video_share_cnt,
+                               download_video_duration,
+                               str(download_video_resolution),
+                               str(download_video_send_time),
+                               str(download_user_name),
+                               str(download_user_id),
+                               str(download_head_url),
+                               str(download_cover_url),
+                               str(download_video_url)]]
+                    time.sleep(1)
+                    Feishu.update_values(log_type, "kuaishou", "3b207c", "F2:V2", values)
+
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range(log_type, "kuaishou", "bnj9op", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
+                    return
+        except Exception as e:
+            Common.logger(log_type).error("下载/上传异常:{}", e)
+
+    # 执行下载/上传
+    @classmethod
+    def run_download_publish(cls, log_type, env):
+        try:
+            while True:
+                follow_feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "bnj9op")
+                if len(follow_feeds_sheet) == 1:
+                    Common.logger(log_type).info("下载/上传完成")
+                    break
+                else:
+                    cls.download_publish(log_type, env)
+        except Exception as e:
+            Common.logger(log_type).error("执行下载/上传异常:{}", e)
+
+
+if __name__ == "__main__":
+    follow = Follow()
+    # follow.get_videos_from_follow("follow")
+    # follow.follow_unfollow("follow", "follow", "687090964")
+    # follow.get_videos_from_person("follow", "1294566048")
+    follow.download_publish("follow", "dev")

+ 50 - 39
main/publish.py

@@ -9,18 +9,16 @@ import json
 import os
 import random
 import time
-
 import oss2
 import requests
 import urllib3
 from main.common import Common
-
 proxies = {"http": None, "https": None}
 
 
 class Publish:
     @classmethod
-    def publish_video_dev(cls, request_data):
+    def publish_video_dev(cls, log_type, request_data):
         """
         loginUid  站内uid (随机)
         appType  默认:888888
@@ -38,14 +36,17 @@ class Publish:
         """
         # Common.logger().info('publish request data: {}'.format(request_data))
         result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
-        # Common.logger().info('publish result: {}'.format(result))
+        # Common.logger(log_type).info('publish result: {}'.format(result))
+        video_id = result["data"]["id"]
+        # Common.logger(log_type).info('video_id: {}'.format(video_id))
         if result['code'] != 0:
-            Common.logger().error('pushlish failure msg = {}'.format(result['msg']))
+            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
         else:
-            Common.logger().info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+        return video_id
 
     @classmethod
-    def publish_video_prod(cls, request_data):
+    def publish_video_prod(cls, log_type, request_data):
         """
         loginUid  站内uid (随机)
         appType  默认:888888
@@ -62,11 +63,14 @@ class Publish:
         :return:
         """
         result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
-        # Common.logger().info('publish result: {}'.format(result))
+        # Common.logger(log_type).info('publish result: {}'.format(result))
+        video_id = result["data"]["id"]
+        # Common.logger(log_type).info('video_id: {}'.format(video_id))
         if result['code'] != 0:
-            Common.logger().error('pushlish failure msg = {}'.format(result['msg']))
+            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
         else:
-            Common.logger().info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+        return video_id
 
     @classmethod
     def request_post(cls, request_url, request_data):
@@ -114,25 +118,25 @@ class Publish:
     - 读取 基本信息 调用发布接口
     """
     # env 日期20220225 文件名
-    oss_file_path_video = r'longvideo/crawler_local/video/{}/{}/{}'
-    oss_file_path_image = r'longvideo/crawler_local/image/{}/{}/{}'
+    oss_file_path_video = 'longvideo/crawler_local/video/{}/{}/{}'
+    oss_file_path_image = 'longvideo/crawler_local/image/{}/{}/{}'
 
     @classmethod
-    def put_file(cls, oss_file, local_file):
+    def put_file(cls, log_type, oss_file, local_file):
         cls.bucket.put_object_from_file(oss_file, local_file)
-        Common.logger().info("put oss file = {}, local file = {} success".format(oss_file, local_file))
+        Common.logger(log_type).info("put oss file = {}, local file = {} success".format(oss_file, local_file))
 
     # 清除本地文件
     @classmethod
-    def remove_local_file(cls, local_file):
+    def remove_local_file(cls, log_type, local_file):
         os.remove(local_file)
-        Common.logger().info("remove local file = {} success".format(local_file))
+        Common.logger(log_type).info("remove local file = {} success".format(local_file))
 
     # 清除本地文件夹
     @classmethod
-    def remove_local_file_dir(cls, local_file):
+    def remove_local_file_dir(cls, log_type, local_file):
         os.rmdir(local_file)
-        Common.logger().info("remove local file dir = {} success".format(local_file))
+        Common.logger(log_type).info("remove local file dir = {} success".format(local_file))
 
     local_file_path = './videos'
     video_file = 'video'
@@ -146,13 +150,14 @@ class Publish:
                       20631233, 20631234, 20631235, 20631236, 20631237]
 
     @classmethod
-    def upload_and_publish(cls, env, job):
+    def upload_and_publish(cls, log_type, env, job):
         """
         上传视频到 oss
+        :param log_type: 选择的 log
         :param env: 测试环境:dev,正式环境:prod
         :param job: 上升榜:up,播放量:play
         """
-        Common.logger().info("upload_and_publish starting...")
+        Common.logger(log_type).info("upload_and_publish starting...")
         today = time.strftime("%Y%m%d", time.localtime())
         # videos 目录下的所有视频文件夹
         files = os.listdir(cls.local_file_path)
@@ -162,10 +167,13 @@ class Publish:
                 fi_d = os.path.join(cls.local_file_path, f)
                 # 确认为视频文件夹
                 if os.path.isdir(fi_d):
-                    Common.logger().info('dir = {}'.format(fi_d))
+                    # Common.logger(log_type).info('dir = {}'.format(fi_d))
                     # 列出所有视频文件夹
                     dir_files = os.listdir(fi_d)
-                    data = {'appType': '888888', 'crawlerSrcCode': 'KUAISHOU_XCX', 'viewStatus': '1', 'versionCode': '1'}
+                    data = {'appType': '888888',
+                            'crawlerSrcCode': 'KUAISHOU_XCX',
+                            'viewStatus': '1',
+                            'versionCode': '1'}
                     now_timestamp = int(round(time.time() * 1000))
                     data['crawlerTaskTimestamp'] = str(now_timestamp)
                     global uid
@@ -182,7 +190,7 @@ class Publish:
                     for fi in dir_files:
                         # 视频文件夹下的所有文件路径
                         fi_path = fi_d + '/' + fi
-                        Common.logger().info('dir fi_path = {}'.format(fi_path))
+                        # Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
                         # 读取 info.txt,赋值给 data
                         if cls.info_file in fi:
                             f = open(fi_path, "r", encoding="UTF-8")
@@ -191,7 +199,7 @@ class Publish:
                                 line = f.readline()
                                 line = line.replace('\n', '')
                                 if line is not None and len(line) != 0 and not line.isspace():
-                                    Common.logger().info("line = {}".format(line))
+                                    # Common.logger(log_type).info("line = {}".format(line))
                                     if i == 0:
                                         data['crawlerSrcId'] = line
                                     elif i == 1:
@@ -201,15 +209,15 @@ class Publish:
                                     elif i == 8:
                                         data['crawlerSrcPublishTimestamp'] = line
                                 else:
-                                    Common.logger().warning("{} line is None".format(fi_path))
+                                    Common.logger(log_type).warning("{} line is None".format(fi_path))
                             f.close()
                             # remove info.txt
-                            cls.remove_local_file(fi_path)
+                            cls.remove_local_file(log_type, fi_path)
                     # 刷新数据
                     dir_files = os.listdir(fi_d)
                     for fi in dir_files:
                         fi_path = fi_d + '/' + fi
-                        Common.logger().info('dir fi_path = {}'.format(fi_path))
+                        # Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
                         # 上传oss
                         if cls.video_file in fi:
                             global oss_video_file
@@ -217,31 +225,34 @@ class Publish:
                                 oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId'])
                             elif env == "prod":
                                 oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId'])
-                            Common.logger().info("oss_video_file = {}".format(oss_video_file))
-                            cls.put_file(oss_video_file, fi_path)
+                            # Common.logger(log_type).info("oss_video_file = {}".format(oss_video_file))
+                            cls.put_file(log_type, oss_video_file, fi_path)
                             data['videoPath'] = oss_video_file
-                            Common.logger().info("videoPath = {}".format(oss_video_file))
+                            # Common.logger(log_type).info("videoPath = {}".format(oss_video_file))
                         elif cls.image_file in fi:
                             global oss_image_file
                             if env == "dev":
                                 oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId'])
                             elif env == "prod":
                                 oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId'])
-                            Common.logger().info("oss_image_file = {}".format(oss_image_file))
-                            cls.put_file(oss_image_file, fi_path)
+                            # Common.logger(log_type).info("oss_image_file = {}".format(oss_image_file))
+                            cls.put_file(log_type, oss_image_file, fi_path)
                             data['coverImgPath'] = oss_image_file
-                            Common.logger().info("coverImgPath = {}".format(oss_image_file))
+                            # Common.logger(log_type).info("coverImgPath = {}".format(oss_image_file))
                         # 全部remove
-                        cls.remove_local_file(fi_path)
+                        cls.remove_local_file(log_type, fi_path)
 
                     # 发布
                     if env == "dev":
-                        cls.publish_video_dev(data)
+                        video_id = cls.publish_video_dev(log_type, data)
                     elif env == "prod":
-                        cls.publish_video_prod(data)
-                    cls.remove_local_file_dir(fi_d)
+                        video_id = cls.publish_video_prod(log_type, data)
+                    else:
+                        video_id = cls.publish_video_dev(log_type, data)
+                    cls.remove_local_file_dir(log_type, fi_d)
+                    return video_id
 
                 else:
-                    Common.logger().error('file not a dir = {}'.format(fi_d))
+                    Common.logger(log_type).warning('file not a dir = {}'.format(fi_d))
             except Exception as e:
-                Common.logger().exception('upload_and_publish error', e)
+                Common.logger(log_type).error('upload_and_publish error', e)

+ 123 - 136
main/download_kuaishou.py → main/recommend_list.py

@@ -10,6 +10,7 @@ import sys
 import time
 import requests
 import urllib3
+
 sys.path.append(os.getcwd())
 from main.common import Common
 from main.feishu_lib import Feishu
@@ -19,28 +20,27 @@ proxies = {"http": None, "https": None}
 
 
 class KuaiShou:
-    # 已下载视频列表
-    download_video_list = []
     # 配置微信号
-    Referer = Feishu.get_range_value("f1R7Mx", "C3:C3")[0][0]["link"]
-    NS_sig3 = Feishu.get_range_value("f1R7Mx", "C4:C4")[0]
-    NS_sig3_origin = Feishu.get_range_value("f1R7Mx", "C5:C5")[0]
-    did = Feishu.get_range_value("f1R7Mx", "C6:C6")[0]
-    session_key = Feishu.get_range_value("f1R7Mx", "C7:C7")[0]
-    unionid = Feishu.get_range_value("f1R7Mx", "C8:C8")[0]
-    eUserStableOpenId = Feishu.get_range_value("f1R7Mx", "C9:C9")[0]
-    openId = Feishu.get_range_value("f1R7Mx", "C10:C10")[0]
-    eOpenUserId = Feishu.get_range_value("f1R7Mx", "C11:C11")[0]
-    kuaishou_wechat_app_st = Feishu.get_range_value("f1R7Mx", "C12:C12")[0]
-    passToken = Feishu.get_range_value("f1R7Mx", "C13:C13")[0]
-    userId = Feishu.get_range_value("f1R7Mx", "C14:C14")[0]
+    wechat_sheet = Feishu.get_values_batch("recommend", "kuaishou", "f1R7Mx")
+    Referer = wechat_sheet[2][2]
+    NS_sig3 = wechat_sheet[3][2]
+    NS_sig3_origin = wechat_sheet[4][2]
+    did = wechat_sheet[5][2]
+    session_key = wechat_sheet[6][2]
+    unionid = wechat_sheet[7][2]
+    eUserStableOpenId = wechat_sheet[8][2]
+    openId = wechat_sheet[9][2]
+    eOpenUserId = wechat_sheet[10][2]
+    kuaishou_wechat_app_st = wechat_sheet[11][2]
+    passToken = wechat_sheet[12][2]
+    userId = wechat_sheet[13][2]
 
     @classmethod
     def sensitive_words(cls):
         # 敏感词库列表
         word_list = []
         # 从云文档读取所有敏感词,添加到词库列表
-        lists = Feishu.get_values_batch("fn8IDi")
+        lists = Feishu.get_values_batch("recommend", "kuaishou", "fn8IDi")
         for i in lists:
             for j in i:
                 # 过滤空的单元格内容
@@ -51,8 +51,7 @@ class KuaiShou:
         return word_list
 
     @staticmethod
-    def kuaishou_download_rule(d_duration, d_width, d_height,
-                               d_play_cnt, d_like_cnt, d_share_cnt):
+    def kuaishou_download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt):
         """
         下载视频的基本规则
         :param d_duration: 时长
@@ -79,7 +78,7 @@ class KuaiShou:
         return False
 
     @classmethod
-    def get_feeds(cls):
+    def get_feeds(cls, log_type):
         """
         1.从快手小程序首页推荐,获取视频列表
         2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c 中去重
@@ -131,7 +130,6 @@ class KuaiShou:
             "sourceFrom": 2,
             "thirdPartyUserId": int(cls.userId)
         }
-
         try:
             urllib3.disable_warnings()
             r = requests.post(url=url, headers=headers, params=params,
@@ -166,100 +164,90 @@ class KuaiShou:
                     .replace(":", "").replace("*", "").replace("?", "") \
                     .replace("?", "").replace('"', "").replace("<", "") \
                     .replace(">", "").replace("|", "")
-                Common.logger().info("video_title:{}".format(video_title))
 
                 if "photoId" not in feeds[i]:
-                    photo_id = "0"
-                    Common.logger().info("photo_id:{}".format(photo_id))
+                    video_id = "0"
                 else:
-                    photo_id = feeds[i]["photoId"]
-                    Common.logger().info("photo_id:{}".format(photo_id))
+                    video_id = feeds[i]["photoId"]
 
                 if "viewCount" not in feeds[i]:
                     video_play_cnt = "0"
-                    Common.logger().info("video_play_cnt:0")
                 else:
                     video_play_cnt = feeds[i]["viewCount"]
-                    Common.logger().info("video_play_cnt:{}".format(video_play_cnt))
 
                 if "likeCount" not in feeds[i]:
                     video_like_cnt = "0"
-                    Common.logger().info("video_like_cnt:0")
                 else:
                     video_like_cnt = feeds[i]["likeCount"]
-                    Common.logger().info("video_like_cnt:{}".format(video_like_cnt))
 
                 if "shareCount" not in feeds[i]:
                     video_share_cnt = "0"
-                    Common.logger().info("video_share_cnt:0")
                 else:
                     video_share_cnt = feeds[i]["shareCount"]
-                    Common.logger().info("video_share_cnt:{}".format(video_share_cnt))
 
                 if "commentCount" not in feeds[i]:
                     video_comment_cnt = "0"
-                    Common.logger().info("video_comment_cnt:0")
                 else:
                     video_comment_cnt = feeds[i]["commentCount"]
-                    Common.logger().info("video_comment_cnt:{}".format(video_comment_cnt))
 
                 if "duration" not in feeds[i]:
                     video_duration = "0"
-                    Common.logger().info("video_duration:不存在")
                 else:
                     video_duration = int(int(feeds[i]["duration"]) / 1000)
-                    Common.logger().info("video_duration:{}秒".format(video_duration))
 
                 if "width" not in feeds[i] or "height" not in feeds[i]:
                     video_width = "0"
                     video_height = "0"
                     video_resolution = str(video_width) + "*" + str(video_height)
-                    Common.logger().info("无分辨率")
                 else:
                     video_width = feeds[i]["width"]
                     video_height = feeds[i]["height"]
                     video_resolution = str(video_width) + "*" + str(video_height)
-                    Common.logger().info("video_resolution:{}".format(video_resolution))
 
                 if "timestamp" not in feeds[i]:
                     video_send_time = "0"
-                    Common.logger().info("video_send_time:不存在")
                 else:
                     video_send_time = feeds[i]["timestamp"]
-                    Common.logger().info("video_send_time:{}".format(
-                        time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
 
                 user_name = feeds[i]["userName"].strip().replace("\n", "") \
                     .replace("/", "").replace("快手", "").replace(" ", "") \
                     .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-                Common.logger().info("user_name:{}".format(user_name))
 
                 user_id = feeds[i]["userId"]
-                Common.logger().info("user_id:{}".format(user_id))
 
                 if "headUrl" not in feeds[i]:
                     head_url = "0"
-                    Common.logger().info("head_url:不存在")
                 else:
                     head_url = feeds[i]["headUrl"]
-                    Common.logger().info("head_url:{}".format(head_url))
 
                 if len(feeds[i]["coverUrls"]) == 0:
                     cover_url = "0"
-                    Common.logger().info("cover_url:不存在")
                 else:
                     cover_url = feeds[i]["coverUrls"][0]["url"]
-                    Common.logger().info("cover_url:{}".format(cover_url))
 
                 if len(feeds[i]["mainMvUrls"]) == 0:
                     video_url = "0"
-                    Common.logger().info("video_url:不存在")
                 else:
                     video_url = feeds[i]["mainMvUrls"][0]["url"]
-                    Common.logger().info("video_url:{}".format(video_url))
+
+                Common.logger(log_type).info("video_title:{}".format(video_title))
+                Common.logger(log_type).info("user_name:{}".format(user_name))
+                Common.logger(log_type).info("video_id:{}".format(video_id))
+                Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
+                Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
+                Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
+                # Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
+                Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
+                # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
+                Common.logger(log_type).info("video_send_time:{}".format(
+                    time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
+                # Common.logger(log_type).info("user_id:{}".format(user_id))
+                # Common.logger(log_type).info("head_url:{}".format(head_url))
+                # Common.logger(log_type).info("cover_url:{}".format(cover_url))
+                Common.logger(log_type).info("video_url:{}".format(video_url))
 
                 # 过滤无效视频
-                if photo_id == "0" \
+                if video_id == "0" \
                         or head_url == "0" \
                         or cover_url == "0" \
                         or video_url == "0" \
@@ -267,28 +255,27 @@ class KuaiShou:
                         or video_send_time == "0" \
                         or user_name == "" \
                         or video_title == "":
-                    Common.logger().info("无效视频")
+                    Common.logger(log_type).info("无效视频\n")
                 # 判断敏感词
                 elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
-                    Common.logger().info("视频已中敏感词:{}".format(kuaishou_title))
+                    Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title))
                 # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c
-                elif photo_id in [j for m in Feishu.get_values_batch("3b207c") for j in m]:
-                    Common.logger().info("该视频已下载:{}", video_title)
+                elif video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3b207c") for j in m]:
+                    Common.logger(log_type).info("该视频已下载:{}\n", video_title)
                 # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=Zt2PGQ
-                elif photo_id in [j for n in Feishu.get_values_batch("Zt2PGQ") for j in n]:
-                    Common.logger().info("该视频已在feeds中:{}", video_title)
+                elif video_id in [j for n in Feishu.get_values_batch(log_type, "kuaishou", "Zt2PGQ") for j in n]:
+                    Common.logger(log_type).info("该视频已在feeds中:{}\n", video_title)
                 else:
-                    Common.logger().info("该视频未下载,添加至feeds中:{}".format(video_title))
                     # feeds工作表,插入首行
                     time.sleep(1)
-                    Feishu.insert_columns("Zt2PGQ", "ROWS", 1, 2)
+                    Feishu.insert_columns(log_type, "kuaishou", "Zt2PGQ", "ROWS", 1, 2)
 
                     # 获取当前时间
                     get_feeds_time = int(time.time())
                     # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
                     values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))),
                                "推荐榜",
-                               photo_id,
+                               video_id,
                                video_title,
                                video_play_cnt,
                                video_comment_cnt,
@@ -304,12 +291,13 @@ class KuaiShou:
                                video_url]]
                     # 等待 1s,防止操作云文档太频繁,导致报错
                     time.sleep(1)
-                    Feishu.update_values("Zt2PGQ", "A2:P2", values)
+                    Feishu.update_values(log_type, "kuaishou", "Zt2PGQ", "A2:P2", values)
+                    Common.logger(log_type).info("视频:{}添加至feeds成功\n".format(video_title))
         except Exception as e:
-            Common.logger().error("获取视频 list 异常:{}".format(e))
+            Common.logger(log_type).error("获取视频 list 异常:{}\n".format(e))
 
     @classmethod
-    def download_publish(cls, env):
+    def download_publish(cls, log_type, env):
         """
         1.从 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=Zt2PGQ 中读取视频信息
         2.下载并上传符合规则的视频
@@ -317,68 +305,69 @@ class KuaiShou:
         正式环境:env == prod
         """
         try:
-            for i in range(1, len(Feishu.get_values_batch("Zt2PGQ")) + 1):
-                time.sleep(1)
-                download_photo_id = Feishu.get_values_batch("Zt2PGQ")[i][2]
-                download_video_title = Feishu.get_values_batch("Zt2PGQ")[i][3]
-                download_video_play_cnt = Feishu.get_values_batch("Zt2PGQ")[i][4]
-                download_video_comment_cnt = Feishu.get_values_batch("Zt2PGQ")[i][5]
-                download_video_like_cnt = Feishu.get_values_batch("Zt2PGQ")[i][6]
-                download_video_share_cnt = Feishu.get_values_batch("Zt2PGQ")[i][7]
-                download_video_duration = Feishu.get_values_batch("Zt2PGQ")[i][8]
-                download_video_resolution = Feishu.get_values_batch("Zt2PGQ")[i][9]
-                download_video_width = download_video_resolution.split("*")[0]
-                download_video_height = download_video_resolution.split("*")[-1]
-                download_video_send_time = Feishu.get_values_batch("Zt2PGQ")[i][10]
-                download_user_name = Feishu.get_values_batch("Zt2PGQ")[i][11]
-                download_user_id = Feishu.get_values_batch("Zt2PGQ")[i][12]
-                download_head_url = Feishu.get_values_batch("Zt2PGQ")[i][13][0]["link"]
-                download_cover_url = Feishu.get_values_batch("Zt2PGQ")[i][14][0]["link"]
-                download_video_url = Feishu.get_values_batch("Zt2PGQ")[i][15][0]["link"]
-
-                # Common.logger().info("download_photo_id:{}", download_photo_id)
-                # Common.logger().info("download_video_title:{}", download_video_title)
-                # Common.logger().info("download_video_play_cnt:{}", download_video_play_cnt)
-                # Common.logger().info("download_video_comment_cnt:{}", download_video_comment_cnt)
-                # Common.logger().info("download_video_like_cnt:{}", download_video_like_cnt)
-                # Common.logger().info("download_video_share_cnt:{}", download_video_share_cnt)
-                # Common.logger().info("download_video_duration:{}", download_video_duration)
-                # Common.logger().info("download_video_resolution:{}", download_video_resolution)
-                # Common.logger().info("download_video_send_time:{}", download_video_send_time)
-                # Common.logger().info("download_user_name:{}", download_user_name)
-                # Common.logger().info("download_user_id:{}", download_user_id)
-                # Common.logger().info("download_head_url:{}", download_head_url)
-                # Common.logger().info("download_cover_url:{}", download_cover_url)
-                # Common.logger().info("download_video_url:{}", download_video_url)
-
-                Common.logger().info("正在判断第{}行,视频:{}", i, download_video_title)
+            recommend_feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "Zt2PGQ")
+            for i in range(1, len(recommend_feeds_sheet) + 1):
+                download_video_id = recommend_feeds_sheet[i][2]
+                download_video_title = recommend_feeds_sheet[i][3]
+                download_video_play_cnt = recommend_feeds_sheet[i][4]
+                download_video_comment_cnt = recommend_feeds_sheet[i][5]
+                download_video_like_cnt = recommend_feeds_sheet[i][6]
+                download_video_share_cnt = recommend_feeds_sheet[i][7]
+                download_video_duration = recommend_feeds_sheet[i][8]
+                download_video_resolution = recommend_feeds_sheet[i][9]
+                download_video_send_time = recommend_feeds_sheet[i][10]
+                download_user_name = recommend_feeds_sheet[i][11]
+                download_user_id = recommend_feeds_sheet[i][12]
+                download_head_url = recommend_feeds_sheet[i][13]
+                download_cover_url = recommend_feeds_sheet[i][14]
+                download_video_url = recommend_feeds_sheet[i][15]
+
+                Common.logger(log_type).info("正在判断第{}行,视频:{}", i + 1, download_video_title)
+
+                # Common.logger(log_type).info("download_video_id:{}", download_video_id)
+                # Common.logger(log_type).info("download_video_title:{}", download_video_title)
+                # Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
+                # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
+                # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
+                # Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
+                # Common.logger(log_type).info("download_video_duration:{}", download_video_duration)
+                # Common.logger(log_type).info("download_video_resolution:{}", download_video_resolution)
+                # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
+                # Common.logger(log_type).info("download_user_name:{}", download_user_name)
+                # Common.logger(log_type).info("download_user_id:{}", download_user_id)
+                # Common.logger(log_type).info("download_head_url:{}", download_head_url)
+                # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
+                # Common.logger(log_type).info("download_video_url:{}", download_video_url)
 
                 # 过滤空行
-                if download_photo_id is None or download_video_title is None or download_video_play_cnt is None:
-                    Common.logger().warning("空行,略过")
+                if download_video_id is None or download_video_title is None or download_video_play_cnt is None:
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range(log_type, "kuaishou", "Zt2PGQ", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("空行,删除成功\n")
+                    return
                 # 去重
-                elif download_photo_id in [j for m in Feishu.get_values_batch("3b207c") for j in m]:
-                    Common.logger().info("该视频已下载:{}", download_video_title)
+                elif download_video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3b207c") for j in
+                                           m]:
                     # 删除行或列,可选 ROWS、COLUMNS
-                    time.sleep(1)
-                    Feishu.dimension_range("Zt2PGQ", "ROWS", i + 1, i + 1)
+                    Feishu.dimension_range(log_type, "kuaishou", "Zt2PGQ", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("该视频已下载:{},删除成功\n", download_video_title)
                     return
-                # 下载规则
+                    # 下载规则
                 elif cls.kuaishou_download_rule(
-                        download_video_duration, download_video_width, download_video_height,
+                        download_video_duration, download_video_resolution.split("*")[0],
+                        download_video_resolution.split("*")[-1],
                         download_video_play_cnt, download_video_like_cnt, download_video_share_cnt) is True:
-                    Common.logger().info("开始下载快手视频:{}".format(download_video_title))
 
                     # 下载封面
-                    Common.download_method(text="cover",
+                    Common.download_method(log_type=log_type, text="cover",
                                            d_name=str(download_video_title), d_url=str(download_cover_url))
                     # 下载视频
-                    Common.download_method(text="video",
+                    Common.download_method(log_type=log_type, text="video",
                                            d_name=str(download_video_title), d_url=str(download_video_url))
                     # 保存视频信息至 "./videos/{download_video_title}/info.txt"
                     with open("./videos/" + download_video_title
                               + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
-                        f_a.write(str(download_photo_id) + "\n" +
+                        f_a.write(str(download_video_id) + "\n" +
                                   str(download_video_title) + "\n" +
                                   str(download_video_duration) + "\n" +
                                   str(download_video_play_cnt) + "\n" +
@@ -393,30 +382,30 @@ class KuaiShou:
                                   str(download_video_url) + "\n" +
                                   str(download_cover_url) + "\n" +
                                   str(cls.did))
-                    Common.logger().info("==========视频信息已保存至info.txt==========")
-
-                    # 添加视频 ID 到 list,用于统计当次下载总数
-                    cls.download_video_list.append(download_photo_id)
+                    Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
 
                     # 上传视频
-                    Common.logger().info("开始上传视频:{}".format(download_video_title))
-                    Publish.upload_and_publish(env, "play")
+                    Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
+                    our_video_id = Publish.upload_and_publish(log_type, env, "play")
+                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
+                    Common.logger(log_type).info("视频上传完成:{}", download_video_title)
 
                     # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3b207c
-                    Common.logger().info("保存视频ID至云文档:{}", download_video_title)
+                    Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
                     # 视频ID工作表,插入首行
-                    Feishu.insert_columns("3b207c", "ROWS", 1, 2)
+                    Feishu.insert_columns(log_type, "kuaishou", "3b207c", "ROWS", 1, 2)
                     # 视频ID工作表,首行写入数据
                     upload_time = int(time.time())
                     values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))),
                                "推荐榜",
-                               str(download_photo_id),
+                               str(download_video_id),
                                str(download_video_title),
-                               str(download_video_play_cnt),
-                               str(download_video_comment_cnt),
-                               str(download_video_like_cnt),
-                               str(download_video_share_cnt),
-                               str(download_video_duration),
+                               our_video_link,
+                               download_video_play_cnt,
+                               download_video_comment_cnt,
+                               download_video_like_cnt,
+                               download_video_share_cnt,
+                               download_video_duration,
                                str(download_video_resolution),
                                str(download_video_send_time),
                                str(download_user_name),
@@ -425,40 +414,38 @@ class KuaiShou:
                                str(download_cover_url),
                                str(download_video_url)]]
                     time.sleep(1)
-                    Feishu.update_values("3b207c", "A2:Q2", values)
+                    Feishu.update_values(log_type, "kuaishou", "3b207c", "F2:V2", values)
 
-                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Zt2PGQ
-                    Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
                     # 删除行或列,可选 ROWS、COLUMNS
                     time.sleep(1)
-                    Feishu.dimension_range("Zt2PGQ", "ROWS", i + 1, i + 1)
+                    Feishu.dimension_range(log_type, "kuaishou", "Zt2PGQ", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("从云文档删除该视频信息成功:{}\n", download_video_title)
                     return
                 else:
-                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Zt2PGQ
-                    Common.logger().info("该视频不满足下载规则,删除在云文档中的信息:{}", download_video_title)
                     # 删除行或列,可选 ROWS、COLUMNS
-                    Feishu.dimension_range("Zt2PGQ", "ROWS", i + 1, i + 1)
+                    Feishu.dimension_range(log_type, "kuaishou", "Zt2PGQ", "ROWS", i + 1, i + 1)
+                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Zt2PGQ
+                    Common.logger(log_type).info("该视频不满足下载规则,删除在云文档中的信息:{}\n", download_video_title)
                     return
         except Exception as e:
-            Common.logger().error("视频 info 异常,删除该视频信息", e)
+            Common.logger(log_type).error("视频 info 异常,删除该视频信息\n", e)
             # 删除行或列,可选 ROWS、COLUMNS
-            Feishu.dimension_range("Zt2PGQ", "ROWS", 2, 2)
+            Feishu.dimension_range(log_type, "kuaishou", "Zt2PGQ", "ROWS", 2, 2)
             return
 
     # 执行上传及下载
     @classmethod
-    def run_download_publish(cls):
+    def run_download_publish(cls, log_type, env):
         try:
             while True:
-                if len(Feishu.get_values_batch("Zt2PGQ")) == 1:
+                if len(Feishu.get_values_batch(log_type, "kuaishou", "Zt2PGQ")) == 1:
                     break
                 else:
-                    cls.download_publish("prod")
+                    cls.download_publish(log_type, env)
         except Exception as e:
-            Common.logger().error(e)
+            Common.logger(log_type).error("执行下载/上传异常:{}", e)
 
 
 if __name__ == "__main__":
     kuaishou = KuaiShou()
-    kuaishou.get_feeds()
-    kuaishou.download_publish("dev")
+    print(kuaishou.sensitive_words())

+ 33 - 0
main/run_follow.py

@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/7/11
+import os
+import sys
+import time
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.follow_list import Follow
+
+
+class Main:
+    @classmethod
+    def main(cls):
+        while True:
+            Common.logger("follow").info("开始抓取个人主页视频\n")
+            # 获取所有关注列表的用户视频
+            Follow.get_videos_from_follow("follow")
+            # 下载/上传
+            Common.logger("follow").info("开始下载/上传视频")
+            Follow.run_download_publish("follow", "dev")
+            # 清除日志
+            Common.del_logs("follow")
+            Common.logger("follow").info("休眠1小时\n")
+            Follow.follow_pcursor = ""
+            Follow.person_pcursor = ""
+            Follow.send_time = 0
+            time.sleep(3600)
+
+
+if __name__ == "__main__":
+    main = Main()
+    main.main()

+ 14 - 34
main/run_recommend.py

@@ -9,46 +9,26 @@ import sys
 import time
 sys.path.append(os.getcwd())
 from main.common import Common
-from main.download_kuaishou import KuaiShou
+from main.recommend_list import KuaiShou
 
 
-def kuaishou_prod_job():
+def main():
     """
     执行正式环境快手脚本
     """
     while True:
-        prod_time = datetime.datetime.now()
-        # 当天下载及上传的视频数:150 条
-        if prod_time.hour > 22 or prod_time.hour < 8:
-            Common.logger().info("已下载视频数:{}".format(len(KuaiShou.download_video_list)))
-            time.sleep(60)
-            # 删除冗余日志
-            Common.del_logs()
-            KuaiShou.download_video_list = []
-            break
-        else:
-            # 抓取符合规则的视频,写入 kuaishou_feeds.txt
-            KuaiShou.get_feeds()
-            # 下载视频,并上传
-            KuaiShou.run_download_publish()
-            # 随机睡眠1-3s
-            time.sleep(random.randint(1, 3))
-
-
-def main_prod():
-    """
-    正式环境主函数
-    """
-    while True:
-        Common.logger().info("开始抓取快手视频")
-        time.sleep(1)
-        while True:
-            main_time = datetime.datetime.now()
-            if 8 <= main_time.hour <= 22:
-                kuaishou_prod_job()
-            else:
-                break
+        Common.logger("recommend").info("开始抓取快手推荐榜\n")
+        # 抓取符合规则的视频,写入 kuaishou_feeds.txt
+        KuaiShou.get_feeds("recommend")
+        # 下载视频,并上传
+        Common.logger("recommend").info("开始下载/上传视频\n")
+        KuaiShou.run_download_publish("recommend", "dev")
+        # 随机睡眠1-3s
+        Common.logger("recommend").info("随机休眠 10 - 20 秒\n")
+        time.sleep(random.randint(10, 20))
+        # 删除冗余日志
+        Common.del_logs("recommend")
 
 
 if __name__ == "__main__":
-    main_prod()
+    main()

+ 0 - 4
requirements.txt

@@ -1,4 +0,0 @@
-loguru==0.6.0
-oss2==2.15.0
-requests==2.27.1
-urllib3==1.26.9

+ 0 - 37
抓取规则.txt

@@ -1,37 +0,0 @@
-==========2022/6/8===========
-一、按照关注账号进行抓取
-1、任务开始时间:
-- 每天早上8点-晚上22点
-2、抓取规则:
-  - 视频播放量点赞量5万+ ,分享量2000+
-  - 视频时长1分钟以上,10分钟以下
-  - 视频分辨率720以上
-  - 站内标题=快手视频原标题 (需要过滤掉标题中的话题#  #和@)
-  - 站内封面图=快手视频原封面图
-3、站内承接:
-- 每日入库100条视频(优先爬取最新达到标准的视频)
-- 视频随机分配到10个虚拟账号。uid列表:快手爬虫账号
-4、特别注意:
-- 视频需要排重,已经抓取过得视频,不要重复抓取
-- 需要对视频库进行持续扫描:如1条视频上周未达到5万+点赞,本周达到了5万点赞,则进行抓取。
-
-
-==========2022/4/15===========
-一、按照数据指标抓取
-1、任务开始时间:
-- 每天早上8点-晚上22点
-2、抓取规则:
-  - 视频播放量点赞量5万+ ,分享量2000+
-  - 视频时长1分钟以上,10分钟以下
-  - 视频分辨率720以上
-  - 站内标题=快手视频原标题 (需要过滤掉标题中的话题#  #)
-  - 站内封面图=快手视频原封面图
-3、站内承接:
-- 每日入库200条视频
-- 视频随机分配到10个虚拟账号。
-4、特别注意:
-- 视频需要排重,已经抓取过得视频,不要重复抓取
-- 需要对视频库进行持续扫描:如1条视频上周未达到5万+点赞,本周达到了5万点赞,则进行抓取。
-5、新增爬虫视频标题过滤词
--  集结吧光合创作者、电影解说、快来露两手、分享家常美食教程、光合作者助手、创作者中心、创作者学院、娱乐星熠计划、解说电影、电影剪辑、放映室、老剧、影视剪辑、精彩片段、冬日影娱大作战、春日追剧计划单、影视解说、中视频影视混剪计划、众志成城共抗疫情、我在追好剧、娱乐星灿计划、电影、电视剧、毛泽东、毛主席、周恩来、林彪、习近平、习大大、彭丽媛、怀旧经典影视
-==============================