wangkun 3 роки тому
батько
коміт
57cb750ba4

Різницю між файлами не показано, бо вона завелика
+ 0 - 0
chlsfiles/charles202205071008.chlsj


Різницю між файлами не показано, бо вона завелика
+ 0 - 0
chlsfiles/charles202205071009.chlsj


+ 4 - 4
main/common.py

@@ -84,7 +84,7 @@ class Common:
         下载封面:text == "cover" ; 下载视频:text == "video"
         需要下载的视频标题:d_title
         视频封面,或视频播放地址:d_url
-        下载保存路径:"./files/{d_title}/"
+        下载保存路径:"./videos/{d_title}/"
         """
         # 首先创建一个保存该视频相关信息的文件夹
         video_dir = "./videos/" + d_name + "/"
@@ -107,7 +107,7 @@ class Common:
                         f.write(chunk)
                 cls.crawler_log().info("==========视频下载完成==========")
             except Exception as e:
-                cls.crawler_log().info("视频下载失败:{}".format(e))
+                cls.crawler_log().error("视频下载失败:{}".format(e))
 
         # 下载封面
         elif text == "cover":
@@ -124,7 +124,7 @@ class Common:
                     f.write(response.content)
                 cls.crawler_log().info("==========封面下载完成==========")
             except Exception as e:
-                cls.crawler_log().info("封面下载失败:{}".format(e))
+                cls.crawler_log().error("封面下载失败:{}".format(e))
 
     @staticmethod
     def read_txt(t_name):
@@ -212,7 +212,7 @@ class Common:
                     time.sleep(10)
                     cls.get_session()
             except Exception as e:
-                cls.crawler_log().info("获取 session 异常,30s后重试:{}".format(e))
+                cls.crawler_log().error("获取 session 异常,30s后重试:{}".format(e))
                 time.sleep(30)
                 cls.get_session()
 

+ 77 - 84
main/download_play.py

@@ -12,6 +12,7 @@ sys.path.append(os.getcwd())
 from main.common import Common
 from main.get_feeds import get_feeds
 from main.publish import Publish
+from main.feishu_lib import Feishu
 
 
 class DownloadPlay:
@@ -41,90 +42,82 @@ class DownloadPlay:
         测试环境:env == dev
         正式环境:env == prod
         """
-        videos = Common.read_txt("kanyikan_feeds.txt")
-        for video in videos:
-            download_video_id = video.strip().split(" + ")[1]
-            try:
-                # download_video_id = video.strip().split(" + ")[1]
-                download_video_title = video.strip().split(" + ")[3]
-                download_video_duration = video.strip().split(" + ")[4]
-                download_video_play_cnt = video.strip().split(" + ")[2]
-                download_video_comment_cnt = video.strip().split(" + ")[5]
-                download_video_like_cnt = video.strip().split(" + ")[6]
-                download_video_share_cnt = video.strip().split(" + ")[7]
-                download_video_resolution = video.strip().split(" + ")[8]
-                download_video_width = download_video_resolution.split("*")[0]
-                download_video_height = download_video_resolution.split("*")[-1]
-                download_video_send_time = video.strip().split(" + ")[9]
-                download_user_name = video.strip().split(" + ")[10]
-                download_head_url = video.strip().split(" + ")[11]
-                download_cover_url = video.strip().split(" + ")[12]
-                download_video_url = video.strip().split(" + ")[13]
-                download_video_session = video.strip().split(" + ")[-1]
-
-                if cls.play_rule(download_video_width,
-                                 download_video_height,
-                                 download_video_duration,
-                                 download_video_play_cnt) is True:
-                    Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
-                    # 下载封面
-                    Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
-                    # 下载视频
-                    Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
-
-                    # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
-                    with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
-                        f_a.write(download_video_id + "\n")
-
-                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                    with open(r"./videos/" + download_video_title + r"/info.txt", "a", encoding="utf8") as f_a:
-                        f_a.write(str(download_video_id) + "\n" +
-                                  str(download_video_title) + "\n" +
-                                  str(download_video_duration) + "\n" +
-                                  str(download_video_play_cnt) + "\n" +
-                                  str(download_video_comment_cnt) + "\n" +
-                                  str(download_video_like_cnt) + "\n" +
-                                  str(download_video_share_cnt) + "\n" +
-                                  str(download_video_resolution) + "\n" +
-                                  str(download_video_send_time) + "\n" +
-                                  str(download_user_name) + "\n" +
-                                  str(download_head_url) + "\n" +
-                                  str(download_video_url) + "\n" +
-                                  str(download_cover_url) + "\n" +
-                                  str(download_video_session))
-
-                    # 上传视频
-                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
-                    Publish.upload_and_publish(env, "play")
-
-                    # 删除该视频在kanyikan_feeds.txt中的信息
-                    Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
-                    with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                        lines = f_r.readlines()
-                    with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                        for line in lines:
-                            if download_video_id in line.split(" + ")[1]:
-                                continue
-                            f_w.write(line)
-                else:
-                    # 删除该视频在 recommend.txt中的信息
-                    Common.crawler_log().info("该视频不满足下载规则,删除在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
-                    with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                        lines = f_r.readlines()
-                    with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                        for line in lines:
-                            if download_video_id in line.split(" + ")[1]:
-                                continue
-                            f_w.write(line)
-            except Exception as e:
-                Common.crawler_log().info("视频 info 异常".format(e))
-                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                    lines = f_r.readlines()
-                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                    for line in lines:
-                        if download_video_id in line.split(" + ")[1]:
-                            continue
-                        f_w.write(line)
+        if len(Feishu.get_values_batch("Y8N3Vl")) == 1:
+            pass
+        else:
+            for i in range(len(Feishu.get_values_batch("Y8N3Vl"))):
+                try:
+                    download_video_id = Feishu.get_values_batch("Y8N3Vl")[i+1][1]
+                    download_video_play_cnt = Feishu.get_values_batch("Y8N3Vl")[i+1][2]
+                    download_video_title = Feishu.get_values_batch("Y8N3Vl")[i+1][3]
+                    download_video_duration = Feishu.get_values_batch("Y8N3Vl")[i+1][4]
+                    download_video_comment_cnt = Feishu.get_values_batch("Y8N3Vl")[i+1][5]
+                    download_video_like_cnt = Feishu.get_values_batch("Y8N3Vl")[i+1][6]
+                    download_video_share_cnt = Feishu.get_values_batch("Y8N3Vl")[i+1][7]
+                    download_video_resolution = Feishu.get_values_batch("Y8N3Vl")[i+1][8]
+                    download_video_width = download_video_resolution.split("*")[0]
+                    download_video_height = download_video_resolution.split("*")[-1]
+                    download_video_send_time = Feishu.get_values_batch("Y8N3Vl")[i+1][9]
+                    download_user_name = Feishu.get_values_batch("Y8N3Vl")[i+1][10]
+                    download_head_url = Feishu.get_values_batch("Y8N3Vl")[i+1][11]
+                    download_cover_url = Feishu.get_values_batch("Y8N3Vl")[i+1][12]
+                    download_video_url = Feishu.get_values_batch("Y8N3Vl")[i+1][13]
+                    download_video_session = Feishu.get_values_batch("Y8N3Vl")[i+1][14]
+
+                    if cls.play_rule(download_video_width, download_video_height,
+                                     download_video_duration, download_video_play_cnt) is True:
+                        Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
+
+                        # 下载封面
+                        Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
+                        # 下载视频
+                        Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
+                        # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                        with open(r"./videos/" + download_video_title + r"/info.txt", "a", encoding="utf8") as f_a:
+                            f_a.write(str(download_video_id) + "\n" +
+                                      str(download_video_title) + "\n" +
+                                      str(download_video_duration) + "\n" +
+                                      str(download_video_play_cnt) + "\n" +
+                                      str(download_video_comment_cnt) + "\n" +
+                                      str(download_video_like_cnt) + "\n" +
+                                      str(download_video_share_cnt) + "\n" +
+                                      str(download_video_resolution) + "\n" +
+                                      str(download_video_send_time) + "\n" +
+                                      str(download_user_name) + "\n" +
+                                      str(download_head_url) + "\n" +
+                                      str(download_video_url) + "\n" +
+                                      str(download_cover_url) + "\n" +
+                                      str(download_video_session))
+
+                        # 上传视频
+                        Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
+                        Publish.upload_and_publish(env, "play")
+
+                        # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                        Common.crawler_log().info("保存视频ID至云文档:{}".format(download_video_title))
+                        # 看一看+ ,视频ID工作表,插入首行
+                        Feishu.insert_columns("20ce0c")
+                        # 看一看+ ,视频ID工作表,首行写入数据
+                        Feishu.update_values("20ce0c", download_video_id, "", "", "",
+                                             "", "", "", "", "", "", "", "", "", "", "")
+
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                        Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
+                        # 删除行或列,可选 ROWS、COLUMNS
+                        Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+
+                    else:
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                        Common.crawler_log().info("该视频不满足下载规则,删除在云文档中的信息:{}".format(download_video_title))
+                        # 删除行或列,可选 ROWS、COLUMNS
+                        Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+
+                except Exception as e:
+                    Common.crawler_log().error("视频 info 异常,删除该视频信息".format(e))
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+
+            cls.download_play_video("prod")
 
 
 if __name__ == "__main__":

+ 134 - 144
main/download_sendtime.py

@@ -14,10 +14,12 @@ import sys
 import time
 import requests
 import urllib3
+
 sys.path.append(os.getcwd())
 from main.common import Common
 from main.get_feeds import get_feeds
 from main.publish import Publish
+from main.feishu_lib import Feishu
 
 proxies = {"http": None, "https": None}
 
@@ -50,154 +52,142 @@ class DownloadSendtime:
         :param env: 测试环境:dev;正式环境:prod
         :return: 下载并上传视频
         """
-        get_sendtime_session = Common.get_session()
-        Common.crawler_log().info("获取视频info时,session:{}".format(get_sendtime_session))
-        lines = Common.read_txt("kanyikan_feeds.txt")
-        for line in lines:
-            v_id = line.strip().split(" + ")[1]  # 视频外网 ID
-            # v_send_date = line.strip().split(" + ")[9]  # 发布时间
-            url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
-            param = {
-                "session": get_sendtime_session,
-                "vid": v_id,
-                "wxaVersion": "3.9.2",
-                "channelid": "208201",
-                "scene": "32",
-                "subscene": "1089",
-                "model": "iPhone 11<iPhone12,1>14.7.1",
-                "clientVersion": "8.0.18",
-                "sharesearchid": "447665862521758270",
-                "sharesource": "-1"
-            }
-            try:
-                urllib3.disable_warnings()
-                r = requests.get(url=url, params=param, proxies=proxies, verify=False)
-                response = json.loads(r.content.decode("utf8"))
-                if "data" not in response:
-                    Common.crawler_log().info("获取视频info时,session过期,等待 30 秒")
-                    # 如果返回空信息,则随机睡眠 30-35 秒
-                    time.sleep(random.randint(31, 35))
-                else:
-                    data = response["data"]
-                    v_title = data["title"]
-                    v_duration = data["duration"]
-                    v_play_cnt_sendtime = data["played_cnt"]
-                    v_comment_cnt = data["comment_cnt"]
-                    v_liked_cnt = data["liked_cnt"]
-                    v_shared_cnt = data["shared_cnt"]
-                    v_width = data["width"]
-                    v_height = data["height"]
-                    v_resolution = str(v_width) + "*" + str(v_height)
-                    v_send_date = data["upload_time"]
-                    v_username = data["user_info"]["nickname"]
-                    v_user_cover = data["user_info"]["headimg_url"]
-                    v_video_cover = data["cover_url"]
-                    if "items" not in data["play_info"]:
-                        if len(data["play_info"]) > 2:
-                            download_url_up = data["play_info"][2]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
-                        else:
-                            download_url_up = data["play_info"][0]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
+        if len(Feishu.get_values_batch("Y8N3Vl")) == 1:
+            pass
+        else:
+            for i in range(len(Feishu.get_values_batch("Y8N3Vl"))):
+                try:
+                    sendtime_session = Common.get_session()
+                    Common.crawler_log().info("获取视频info时,session:{}".format(sendtime_session))
+                    download_video_id = Feishu.get_values_batch("Y8N3Vl")[i+1][1]
+                    download_video_title = Feishu.get_values_batch("Y8N3Vl")[i+1][3]
+                    url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
+                    param = {
+                        "session": sendtime_session,
+                        "vid": download_video_id,
+                        "wxaVersion": "3.9.2",
+                        "channelid": "208201",
+                        "scene": "32",
+                        "subscene": "1089",
+                        "model": "iPhone 11<iPhone12,1>14.7.1",
+                        "clientVersion": "8.0.18",
+                        "sharesearchid": "447665862521758270",
+                        "sharesource": "-1"
+                    }
+                    urllib3.disable_warnings()
+                    r = requests.get(url=url, params=param, proxies=proxies, verify=False)
+                    response = json.loads(r.content.decode("utf8"))
+                    if "data" not in response:
+                        Common.crawler_log().error("获取视频info时错误,删除该视频:{}".format(download_video_title))
+                        # 删除行或列,可选 ROWS、COLUMNS
+                        Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
                     else:
-                        if len(data["play_info"]["items"]) > 2:
-                            download_url_up = data["play_info"]["items"][2]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
+                        data = response["data"]
+                        v_duration = data["duration"]
+                        v_play_cnt_sendtime = data["played_cnt"]
+                        v_comment_cnt = data["comment_cnt"]
+                        v_liked_cnt = data["liked_cnt"]
+                        v_shared_cnt = data["shared_cnt"]
+                        v_width = data["width"]
+                        v_height = data["height"]
+                        v_resolution = str(v_width) + "*" + str(v_height)
+                        v_send_date = data["upload_time"]
+                        v_username = data["user_info"]["nickname"].strip().replace("\n", "")
+                        v_user_cover = data["user_info"]["headimg_url"]
+                        v_video_cover = data["cover_url"]
+                        if "items" not in data["play_info"]:
+                            if len(data["play_info"]) > 2:
+                                download_url_up = data["play_info"][2]["play_url"]
+                                # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
+                            else:
+                                download_url_up = data["play_info"][0]["play_url"]
+                                # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                         else:
-                            download_url_up = data["play_info"]["items"][0]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
-
-                    # 判断基本规则
-                    if cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is True \
-                            and v_id != "" and v_title != "" and v_duration != "" \
-                            and v_play_cnt_sendtime != "" and v_comment_cnt != "" and v_liked_cnt != "" \
-                            and v_shared_cnt != "" and v_width != "" and v_height != "" \
-                            and v_send_date != "" and v_username != "" and v_user_cover != "" \
-                            and v_video_cover != "" and download_url_up != "":
-                        # 满足下载条件:当前时间 - 发布时间 <= 3天,播放量大于2万
-                        if int(time.time()) - int(v_send_date) <= 604800:
-                            if int(v_play_cnt_sendtime) >= 10000:
-                                Common.crawler_log().info("该视频:{}".format(
-                                    v_title) + " " + "在7天内的播放量{}>=10000".format(v_play_cnt_sendtime))
-                                # 下载封面
-                                Common.download_method("cover", v_title, v_video_cover)
-                                # 下载视频
-                                Common.download_method("video", v_title, download_url_up)
-                                # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
-                                with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
-                                    f_a.write(v_id + "\n")
-                                # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                                with open(r"./videos/" + v_title + "/" + "info.txt",
-                                          "a", encoding="utf8") as f_a2:
-                                    f_a2.write(str(v_id) + "\n" +
-                                               str(v_title) + "\n" +
-                                               str(v_duration) + "\n" +
-                                               str(v_play_cnt_sendtime) + "\n" +
-                                               str(v_comment_cnt) + "\n" +
-                                               str(v_liked_cnt) + "\n" +
-                                               str(v_shared_cnt) + "\n" +
-                                               str(v_resolution) + "\n" +
-                                               str(v_send_date) + "\n" +
-                                               str(v_username) + "\n" +
-                                               str(v_user_cover) + "\n" +
-                                               str(download_url_up) + "\n" +
-                                               str(v_video_cover) + "\n" +
-                                               str(get_sendtime_session))
-
-                                # 上传该视频
-                                Common.crawler_log().info("开始上传视频:{}".format(v_title))
-                                Publish.upload_and_publish(env, "send_time")
-
-                                # 删除该视频在kanyikan_feeds.txt中的信息
-                                Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
-                                    lines = f1.readlines()
-                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
-                                    for line1 in lines:
-                                        if v_id in line1.split(" + ")[1]:
-                                            continue
-                                        f_w1.write(line1)
+                            if len(data["play_info"]["items"]) > 2:
+                                download_url_up = data["play_info"]["items"][2]["play_url"]
+                                # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                             else:
-                                # 删除之前保存的该视频信息
-                                Common.crawler_log().info("该视频7天播放量:{}<10000".format(
-                                    int(v_play_cnt_sendtime)) + ";" + "不满足下载规则:{}".format(v_title))
-                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                                    lines = f_r.readlines()
-                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                                    for line2 in lines:
-                                        if v_id in line2.split(" + ")[1]:
-                                            continue
-                                        f_w.write(line2)
+                                download_url_up = data["play_info"]["items"][0]["play_url"]
+                                # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
+
+                        # 判断基本规则
+                        if cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is True \
+                                and download_video_id != "" and download_video_title != "" and v_duration != "" \
+                                and v_play_cnt_sendtime != "" and v_comment_cnt != "" and v_liked_cnt != "" \
+                                and v_shared_cnt != "" and v_width != "" and v_height != "" \
+                                and v_send_date != "" and v_username != "" and v_user_cover != "" \
+                                and v_video_cover != "" and download_url_up != "":
+                            # 满足下载条件:当前时间 - 发布时间 <= 3天,播放量大于1万
+                            if int(time.time()) - int(v_send_date) <= 604800:
+                                if int(v_play_cnt_sendtime) >= 10000:
+                                    Common.crawler_log().info("该视频:{}".format(
+                                        download_video_title) + " " + "在7天内的播放量{}>=10000".format(v_play_cnt_sendtime))
+
+                                    # 下载封面
+                                    Common.download_method("cover", download_video_title, v_video_cover)
+                                    # 下载视频
+                                    Common.download_method("video", download_video_title, download_url_up)
+                                    # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
+                                    with open(r"./videos/" + download_video_title +
+                                              "/" + "info.txt", "a", encoding="utf8") as f_a2:
+                                        f_a2.write(str(download_video_id) + "\n" +
+                                                   str(download_video_title) + "\n" +
+                                                   str(v_duration) + "\n" +
+                                                   str(v_play_cnt_sendtime) + "\n" +
+                                                   str(v_comment_cnt) + "\n" +
+                                                   str(v_liked_cnt) + "\n" +
+                                                   str(v_shared_cnt) + "\n" +
+                                                   str(v_resolution) + "\n" +
+                                                   str(v_send_date) + "\n" +
+                                                   str(v_username) + "\n" +
+                                                   str(v_user_cover) + "\n" +
+                                                   str(download_url_up) + "\n" +
+                                                   str(v_video_cover) + "\n" +
+                                                   str(sendtime_session))
+
+                                    # 上传该视频
+                                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
+                                    Publish.upload_and_publish(env, "send_time")
+
+                                    # 保存视频 ID 到云文档:
+                                    # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                                    Common.crawler_log().info("保存视频ID至云文档:{}".format(download_video_title))
+                                    # 看一看+ ,视频ID工作表,插入首行
+                                    Feishu.insert_columns("20ce0c")
+                                    # 看一看+ ,视频ID工作表,首行写入数据
+                                    Feishu.update_values("20ce0c", download_video_id, "", "", "",
+                                                         "", "", "", "", "", "", "", "", "", "", "")
+
+                                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                                    Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
+                                    # 删除行或列,可选 ROWS、COLUMNS
+                                    Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+
+                                else:
+                                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                                    Common.crawler_log().info("该视频7天播放量:{}<10000".format(
+                                        int(v_play_cnt_sendtime)) + ";" + "不满足下载规则:{}".format(download_video_title))
+                                    # 删除行或列,可选 ROWS、COLUMNS
+                                    Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+                            else:
+                                # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                                Common.crawler_log().info("视频发布时间大于7天:{}天".format(
+                                    int((int(time.time()) - int(v_send_date)) / 86400))
+                                                          + ";" + "标题:{}".format(download_video_title))
+                                # 删除行或列,可选 ROWS、COLUMNS
+                                Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
                         else:
-                            Common.crawler_log().info("视频发布时间大于7天:{}天".format(
-                                int((int(time.time()) - int(v_send_date)) / 86400))
-                                                + ";" + "标题:{}".format(v_title))
-                            with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                                lines = f_r.readlines()
-                            with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                                for line2 in lines:
-                                    if v_id in line2.split(" + ")[1]:
-                                        continue
-                                    f_w.write(line2)
-                    else:
-                        Common.crawler_log().info("不满足下载规则:{}".format(v_title))
-                        with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                            lines = f_r.readlines()
-                        with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                            for line3 in lines:
-                                if v_id in line3.split(" + ")[1]:
-                                    continue
-                                f_w.write(line3)
-
-            except Exception as e:
-                Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
-                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                    lines = f_r.readlines()
-                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                    for line4 in lines:
-                        if v_id in line4.split(" + ")[1]:
-                            continue
-                        f_w.write(line4)
+                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                            Common.crawler_log().info("不满足下载规则:{}".format(download_video_title))
+                            # 删除行或列,可选 ROWS、COLUMNS
+                            Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+
+                except Exception as e:
+                    Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+
+            cls.download_sendtime_video("prod")
 
 
 if __name__ == "__main__":

+ 159 - 165
main/download_up.py

@@ -12,7 +12,6 @@
 
 import json
 import os
-import random
 import sys
 import time
 import requests
@@ -21,6 +20,7 @@ sys.path.append(os.getcwd())
 from main.common import Common
 from main.get_feeds import get_feeds
 from main.publish import Publish
+from main.feishu_lib import Feishu
 
 proxies = {"http": None, "https": None}
 
@@ -51,179 +51,173 @@ class DownloadUp:
         1.从 kanyikan_feeds.txt 中获取 videoid
         2.根据 videoid,从 videoinfo 接口,获取当前视频最新的信息
         3.根据下载规则判断,符合规则进行下载:
-            1 更新视频 ID 到 "./txt/kanyikan_videoid.txt"
+            1 更新视频 ID 到 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
             2 视频信息写入文件 "./videos/{d_title}/info.txt"
         4.上传完成:
-            1 删除该视频在 "./txt/kanyikan_feeds.txt" 中的信息
+            1 删除该视频在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl 中的信息
         """
-        get_video_info_session = Common.get_session()
-        Common.crawler_log().info("获取视频info时,session:{}".format(get_video_info_session))
-        lines = Common.read_txt("kanyikan_feeds.txt")
-        for line in lines:
-            v_time = line.strip().split(" + ")[0]  # 第一次获取该视频的时间
-            v_id = line.strip().split(" + ")[1]  # 外网视频 ID
-            v_play_ctn = line.strip().split(" + ")[2]  # 播放量
-            url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
-            param = {
-                "session": get_video_info_session,
-                "vid": v_id,
-                "wxaVersion": "3.9.2",
-                "channelid": "208201",
-                "scene": "32",
-                "subscene": "1089",
-                "model": "iPhone 11<iPhone12,1>14.7.1",
-                "clientVersion": "8.0.18",
-                "sharesearchid": "447665862521758270",
-                "sharesource": "-1"
-            }
-            try:
-                urllib3.disable_warnings()
-                r = requests.get(url=url, params=param, proxies=proxies, verify=False)
-                response = json.loads(r.content.decode("utf8"))
-                if "data" not in response:
-                    Common.crawler_log().error("获取视频info时,session过期,等待30秒")
-                    # 如果返回空信息,则随机睡眠 31-35 秒
-                    time.sleep(random.randint(31, 35))
-                else:
-                    data = response["data"]
-                    v_title = data["title"]
-                    v_duration = data["duration"]
-                    v_play_cnt_up = data["played_cnt"]
-                    v_comment_cnt = data["comment_cnt"]
-                    v_liked_cnt = data["liked_cnt"]
-                    v_shared_cnt = data["shared_cnt"]
-                    v_width = data["width"]
-                    v_height = data["height"]
-                    v_resolution = str(v_width) + "*" + str(v_height)
-                    v_send_date = data["upload_time"]
-                    v_username = data["user_info"]["nickname"]
-                    v_user_cover = data["user_info"]["headimg_url"]
-                    v_video_cover = data["cover_url"]
-                    if "items" not in data["play_info"]:
-                        if len(data["play_info"]) > 2:
-                            download_url_up = data["play_info"][2]["play_url"]
-                        else:
-                            download_url_up = data["play_info"][0]["play_url"]
+        if len(Feishu.get_values_batch("Y8N3Vl")) == 1:
+            pass
+        else:
+            for i in range(len(Feishu.get_values_batch("Y8N3Vl"))):
+                try:
+                    video_info_session = Common.get_session()
+                    Common.crawler_log().info("获取视频info时,session:{}".format(video_info_session))
+                    download_time = Feishu.get_values_batch("Y8N3Vl")[i+1][0]  # 第一次获取该视频的时间
+                    download_video_id = Feishu.get_values_batch("Y8N3Vl")[i+1][1]  # 外网视频 ID
+                    download_video_play_cnt = Feishu.get_values_batch("Y8N3Vl")[i+1][2]  # 播放量
+                    download_video_title = Feishu.get_values_batch("Y8N3Vl")[i+1][3]
+
+                    url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
+                    param = {
+                        "session": video_info_session,
+                        "vid": download_video_id,
+                        "wxaVersion": "3.9.2",
+                        "channelid": "208201",
+                        "scene": "32",
+                        "subscene": "1089",
+                        "model": "iPhone 11<iPhone12,1>14.7.1",
+                        "clientVersion": "8.0.18",
+                        "sharesearchid": "447665862521758270",
+                        "sharesource": "-1"
+                    }
+                    urllib3.disable_warnings()
+                    r = requests.get(url=url, params=param, proxies=proxies, verify=False)
+                    response = json.loads(r.content.decode("utf8"))
+                    if "data" not in response:
+                        Common.crawler_log().error("获取视频info时错误,删除该视频:{}".format(download_video_title))
+                        # 删除行或列,可选 ROWS、COLUMNS
+                        Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
                     else:
-                        if len(data["play_info"]["items"]) > 2:
-                            download_url_up = data["play_info"]["items"][2]["play_url"]
+                        data = response["data"]
+                        v_duration = data["duration"]
+                        v_play_cnt_up = data["played_cnt"]
+                        v_comment_cnt = data["comment_cnt"]
+                        v_liked_cnt = data["liked_cnt"]
+                        v_shared_cnt = data["shared_cnt"]
+                        v_width = data["width"]
+                        v_height = data["height"]
+                        v_resolution = str(v_width) + "*" + str(v_height)
+                        v_send_date = data["upload_time"]
+                        v_username = data["user_info"]["nickname"].strip().replace("\n", "")
+                        v_user_cover = data["user_info"]["headimg_url"]
+                        v_video_cover = data["cover_url"]
+                        if "items" not in data["play_info"]:
+                            if len(data["play_info"]) > 2:
+                                download_url_up = data["play_info"][2]["play_url"]
+                            else:
+                                download_url_up = data["play_info"][0]["play_url"]
                         else:
-                            download_url_up = data["play_info"]["items"][0]["play_url"]
+                            if len(data["play_info"]["items"]) > 2:
+                                download_url_up = data["play_info"]["items"][2]["play_url"]
+                            else:
+                                download_url_up = data["play_info"]["items"][0]["play_url"]
+    
+                        # 判断基本规则
+                        if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
+                                and download_video_id != "" and download_video_title != "" and v_duration != "" \
+                                and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
+                                and v_shared_cnt != "" and v_width != "" and v_height != "" \
+                                and v_send_date != "" and v_username != "" and v_user_cover != "" \
+                                and v_video_cover != "" and download_url_up != "":
+                            if int(time.time()) - int(download_time) < 3600:
+                                Common.crawler_log().info("距上次获取该视频时间:{}分钟".format(
+                                    int((int(int(time.time()) - int(download_time))) / 60))
+                                                          + ";{}".format(download_video_title))
+                            elif 7200 >= int(time.time()) - int(download_time) >= 3600:
+                                if int(v_play_cnt_up) - int(download_video_play_cnt) >= 1000:
+                                    Common.crawler_log().info("该视频:{}".format(
+                                        download_video_title) + " " + "在1小时内的播放量{}>=1000".format(
+                                        int(v_play_cnt_up) - int(download_video_play_cnt)))
 
-                    # 判断基本规则
-                    if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
-                            and v_id != "" and v_title != "" and v_duration != "" \
-                            and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
-                            and v_shared_cnt != "" and v_width != "" and v_height != "" \
-                            and v_send_date != "" and v_username != "" and v_user_cover != "" \
-                            and v_video_cover != "" and download_url_up != "":
-                        if int(time.time()) - int(v_time) < 3600:
-                            Common.crawler_log().info("距上次获取该视频时间:{}分钟".format(
-                                int((int(int(time.time()) - int(v_time))) / 60)) + ";{}".format(v_title))
-                        elif 7200 >= int(time.time()) - int(v_time) >= 3600:
-                            if int(v_play_cnt_up) - int(v_play_ctn) >= 1000:
-                                Common.crawler_log().info("该视频:{}".format(
-                                    v_title) + " " + "在1小时内的播放量{}>=1000".format(int(v_play_cnt_up) - int(v_play_ctn)))
-                                # 下载封面
-                                Common.download_method("cover", v_title, v_video_cover)
-                                # 下载视频
-                                Common.download_method("video", v_title, download_url_up)
-                                # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
-                                with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
-                                    f_a.write(v_id + "\n")
-                                # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                                with open(r"./videos/" + v_title + "/" + "info.txt",
-                                          "a", encoding="utf8") as f_a2:
-                                    f_a2.write(str(v_id) + "\n" +
-                                               str(v_title) + "\n" +
-                                               str(v_duration) + "\n" +
-                                               str(v_play_cnt_up) + "\n" +
-                                               str(v_comment_cnt) + "\n" +
-                                               str(v_liked_cnt) + "\n" +
-                                               str(v_shared_cnt) + "\n" +
-                                               str(v_resolution) + "\n" +
-                                               str(v_send_date) + "\n" +
-                                               str(v_username) + "\n" +
-                                               str(v_user_cover) + "\n" +
-                                               str(download_url_up) + "\n" +
-                                               str(v_video_cover) + "\n" +
-                                               str(get_video_info_session))
+                                    # 下载封面
+                                    Common.download_method("cover", download_video_title, v_video_cover)
+                                    # 下载视频
+                                    Common.download_method("video", download_video_title, download_url_up)
+                                    # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
+                                    with open(r"./videos/" + download_video_title
+                                              + "/" + "info.txt", "a", encoding="utf8") as f_a2:
+                                        f_a2.write(str(download_video_id) + "\n" +
+                                                   str(download_video_title) + "\n" +
+                                                   str(v_duration) + "\n" +
+                                                   str(v_play_cnt_up) + "\n" +
+                                                   str(v_comment_cnt) + "\n" +
+                                                   str(v_liked_cnt) + "\n" +
+                                                   str(v_shared_cnt) + "\n" +
+                                                   str(v_resolution) + "\n" +
+                                                   str(v_send_date) + "\n" +
+                                                   str(v_username) + "\n" +
+                                                   str(v_user_cover) + "\n" +
+                                                   str(download_url_up) + "\n" +
+                                                   str(v_video_cover) + "\n" +
+                                                   str(video_info_session))
+    
+                                    # 上传该视频
+                                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
+                                    Publish.upload_and_publish(env, "up")
 
-                                # 上传该视频
-                                Common.crawler_log().info("开始上传视频:{}".format(v_title))
-                                Publish.upload_and_publish(env, "up")
+                                    # 保存视频 ID 到云文档:
+                                    # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                                    Common.crawler_log().info("保存视频ID至云文档:{}".format(download_video_title))
+                                    # 看一看+ ,视频ID工作表,插入首行
+                                    Feishu.insert_columns("20ce0c")
+                                    # 看一看+ ,视频ID工作表,首行写入数据
+                                    Feishu.update_values("20ce0c", download_video_id, "", "", "",
+                                                         "", "", "", "", "", "", "", "", "", "", "")
 
-                                # 删除该视频在kanyikan_feeds.txt中的信息
-                                Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
-                                    lines = f1.readlines()
-                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
-                                    for line1 in lines:
-                                        if v_id in line1.split(" + ")[1]:
-                                            continue
-                                        f_w1.write(line1)
-                            else:
-                                # 删除之前保存的该视频信息,并把现在的信息保存进去
-                                Common.crawler_log().info("该视频1小时内的播放量:{}<1000".format(
-                                    int(v_play_cnt_up) - int(v_play_ctn)) + ";"
-                                                          + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                                    lines = f_r.readlines()
-                                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                                    for line2 in lines:
-                                        if v_id in line2.split(" + ")[1]:
-                                            continue
-                                        f_w.write(line2)
-                                with open(r"./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a:
-                                    f_a.write(str(int(time.time())) + " + "
-                                              + str(v_id) + " + "
-                                              + str(v_play_cnt_up) + " + "
-                                              + str(v_title) + " + "
-                                              + str(v_duration) + " + "
-                                              + str(v_comment_cnt) + " + "
-                                              + str(v_liked_cnt) + " + "
-                                              + str(v_shared_cnt) + " + "
-                                              + str(v_resolution) + " + "
-                                              + str(v_send_date) + " + "
-                                              + str(v_username) + " + "
-                                              + str(v_user_cover) + " + "
-                                              + str(v_video_cover) + " + "
-                                              + str(download_url_up) + " + "
-                                              + str(get_video_info_session) + "\n")
+                                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                                    Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
+                                    # 删除行或列,可选 ROWS、COLUMNS
+                                    Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+                                else:
+                                    # 删除之前保存的该视频信息,并把现在的信息保存进去
+                                    Common.crawler_log().info("该视频1小时内的播放量:{}<1000".format(
+                                        int(v_play_cnt_up) - int(download_video_play_cnt)
+                                    ) + ";" + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
+                                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                                    Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
+                                    # 删除行或列,可选 ROWS、COLUMNS
+                                    Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
 
-                        elif int(time.time()) - int(v_time) > 7200:
-                            Common.crawler_log().info("距上次获取该视频时间:{}分钟。超过2小时,删除该视频".format(
-                                int((int(time.time()) - int(v_time)) / 60)) + ";" + "标题:{}".format(v_title))
-                            # 删除之前保存的该视频信息
-                            Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                            with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                                lines = f_r.readlines()
-                            with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                                for line2 in lines:
-                                    if v_id in line2.split(" + ")[1]:
-                                        continue
-                                    f_w.write(line2)
-                    else:
-                        Common.crawler_log().info("不满足下载规则:{}".format(v_title))
-                        # 删除之前保存的该视频信息
-                        Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
-                        with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                            lines = f_r.readlines()
-                        with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                            for line3 in lines:
-                                if v_id in line3.split(" + ")[1]:
-                                    continue
-                                f_w.write(line3)
-            except Exception as e:
-                Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
-                # 删除之前保存的该视频信息
-                with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                    lines = f_r.readlines()
-                with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                    for line4 in lines:
-                        if v_id in line4.split(" + ")[1]:
-                            continue
-                        f_w.write(line4)
+                                    # 看一看+工作表,插入首行
+                                    print(Feishu.insert_columns("Y8N3Vl"))
+
+                                    # 获取当前时间
+                                    download_up_time = int(time.time())
+                                    # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
+                                    Feishu.update_values("Y8N3Vl",
+                                                         a1=str(download_up_time),
+                                                         b1=str(download_video_id),
+                                                         c1=str(v_play_cnt_up),
+                                                         d1=str(download_video_title),
+                                                         e1=str(v_duration),
+                                                         f1=str(v_comment_cnt),
+                                                         g1=str(v_liked_cnt),
+                                                         h1=str(v_shared_cnt),
+                                                         i1=str(v_resolution),
+                                                         j1=str(v_send_date),
+                                                         k1=str(v_username),
+                                                         l1=str(v_user_cover),
+                                                         m1=str(v_video_cover),
+                                                         n1=str(download_url_up),
+                                                         o1=str(video_info_session))
+                            elif int(time.time()) - int(download_time) > 7200:
+                                Common.crawler_log().info("距上次获取该视频时间:""{}分钟。超过2小时,删除该视频"
+                                                          .format(int((int(time.time()) - int(download_time)) / 60))
+                                                          + ";" + "标题:{}".format(download_video_title))
+                                # 删除行或列,可选 ROWS、COLUMNS
+                                Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+                        else:
+                            Common.crawler_log().info("不满足下载规则:{}".format(download_video_title))
+                            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                            Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
+                            # 删除行或列,可选 ROWS、COLUMNS
+                            Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
+                except Exception as e:
+                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                    Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
 
 
 if __name__ == "__main__":

+ 261 - 0
main/feishu_lib.py

@@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/5/6
+import json
+import requests
+import urllib3
+
+proxies = {"http": None, "https": None}
+
+
+class Feishu:
+    """
+    编辑飞书云文档
+    """
+    feishu_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
+    spreadsheetToken = "shtcngRPoDYAi24x52j2nDuHMih"
+
+    # 获取飞书api token
+    @classmethod
+    def get_token(cls):
+        """
+        获取飞书api token
+        :return:
+        """
+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
+
+        urllib3.disable_warnings()
+        response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
+
+        tenant_access_token = response.json()["tenant_access_token"]
+
+        return tenant_access_token
+
+    # 获取表格元数据
+    @classmethod
+    def get_metainfo(cls):
+        """
+        获取表格元数据
+        :return:
+        """
+        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/metainfo"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            "extFields": "protectedRange",  # 额外返回的字段,extFields=protectedRange时返回保护行列信息
+            "user_id_type": "open_id"  # 返回的用户id类型,可选open_id,union_id
+        }
+        urllib3.disable_warnings()
+        r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+        response = json.loads(r.content.decode("utf8"))
+
+        return response
+
+    # 读取工作表中所有数据
+    @classmethod
+    def get_values_batch(cls, sheetid):
+        """
+        读取工作表中所有数据
+        :param sheetid: 哪张表
+        :return: 所有数据
+        """
+
+        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/values_batch_get"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            # 多个查询范围 如 url?ranges=range1,range2 ,其中 range 包含 sheetId 与单元格范围两部分
+            "ranges": sheetid,
+
+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
+            # valueRenderOption=FormattedValue 计算并格式化单元格;
+            # valueRenderOption=Formula单元格中含有公式时返回公式本身;
+            # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
+            "valueRenderOption": "ToString",
+
+            # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
+            "dateTimeRenderOption": "",
+
+            # 返回的用户id类型,可选open_id,union_id
+            "user_id_type": "open_id"
+        }
+        urllib3.disable_warnings()
+        r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+        response = json.loads(r.content.decode("utf8"))
+        values = response["data"]["valueRanges"][0]["values"]
+
+        return values
+
+    # 看一看+工作表,插入首行
+    @classmethod
+    def insert_columns(cls, sheetid):
+        """
+        插入行或列
+        :return:插入首行
+        """
+        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/"\
+              + cls.spreadsheetToken + "/insert_dimension_range"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "dimension": {
+                "sheetId": sheetid,
+                "majorDimension": "ROWS",  # 默认 ROWS ,可选 ROWS、COLUMNS
+                "startIndex": 1,  # 开始的位置
+                "endIndex": 2  # 结束的位置
+            },
+            "inheritStyle": "AFTER"  # BEFORE 或 AFTER,不填为不继承 style
+        }
+        urllib3.disable_warnings()
+        r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+
+        return r.json()["msg"]
+
+    # 看一看+工作表,首行写入数据
+    @classmethod
+    def update_values(cls, sheetid, a1, b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1):
+        """
+        写入数据
+        :param sheetid: 哪张工作表
+        :param a1: 单元格
+        :param b1: 单元格
+        :param c1: 单元格
+        :param d1: 单元格
+        :param e1: 单元格
+        :param f1: 单元格
+        :param g1: 单元格
+        :param h1: 单元格
+        :param i1: 单元格
+        :param j1: 单元格
+        :param k1: 单元格
+        :param l1: 单元格
+        :param m1: 单元格
+        :param n1: 单元格
+        :param o1: 单元格
+        :return: 
+        """
+
+        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/values_batch_update"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "valueRanges": [
+                {
+                    "range": sheetid + "!A2:O2",
+                    "values": [
+                        [a1, b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1]
+                    ]
+                },
+            ],
+        }
+        urllib3.disable_warnings()
+        r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+
+        return r.json()["msg"]
+
+    # 读取单元格数据
+    @classmethod
+    def get_range_value(cls, sheetid, cell):
+        """
+        读取单元格内容
+        :param sheetid: 哪张工作表
+        :param cell: 哪个单元格
+        :return: 单元格内容
+        """
+        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+              + cls.spreadsheetToken + "/values/" + sheetid + "!" + cell
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
+            # valueRenderOption=FormattedValue 计算并格式化单元格;
+            # valueRenderOption=Formula单元格中含有公式时返回公式本身;
+            # valueRenderOption=UnformattedValue计算但不对单元格进行格式化。
+            "valueRenderOption": "ToString",
+
+            # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
+            "dateTimeRenderOption": "",
+
+            # 返回的用户id类型,可选open_id,union_id
+            "user_id_type": "open_id"
+        }
+        urllib3.disable_warnings()
+        r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+        return r.json()["data"]["valueRange"]["values"][0]
+
+    # 删除行或列,可选 ROWS、COLUMNS
+    @classmethod
+    def dimension_range(cls, sheetid, major_dimension, startindex, endindex):
+        """
+        删除行或列
+        :param sheetid:工作表
+        :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
+        :param startindex:开始的位置
+        :param endindex:结束的位置
+        :return:
+        """
+        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" + cls.spreadsheetToken + "/dimension_range"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "dimension": {
+                "sheetId": sheetid,
+                "majorDimension": major_dimension,
+                "startIndex": startindex,
+                "endIndex": endindex
+                }
+            }
+        urllib3.disable_warnings()
+        r = requests.delete(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+
+        return r.json()["msg"]
+
+
+if __name__ == "__main__":
+    feishu = Feishu()
+
+    # # 获取飞书api token
+    # feishu.get_token()
+    # # 获取表格元数据
+    # feishu.get_metainfo()
+
+    # 读取工作表中所有数据
+    print(feishu.get_values_batch("Y8N3Vl"))
+    print(len(feishu.get_values_batch("SdCHOM")))
+    for i in range(len(feishu.get_values_batch("Y8N3Vl"))):
+        videoid = feishu.get_values_batch("Y8N3Vl")[i][1]
+        if videoid == "b3":
+            # 删除行或列,可选 ROWS、COLUMNS
+            feishu.dimension_range("Y8N3Vl", "ROWS", i+1, i+1)
+            print(videoid)
+
+
+    #
+    # # 看一看+工作表,插入首行
+    # print(feishu.insert_columns("Y8N3Vl"))
+    #
+    # # 看一看+工作表,首行写入数据
+    # print(feishu.update_values("Y8N3Vl", "a1", "b1", "c1", "d1", "e1", "f1", "g1",
+    #                            "h1", "i1", "j1", "k1", "l1", "m1", "n1", "o1"))
+
+    # # 查询单元格内容
+    # print(feishu.get_range_value("Y8N3Vl", "B8:C8"))
+    #
+    # # 删除行或列,可选 ROWS、COLUMNS
+    # feishu.dimension_range("Y8N3Vl", "ROWS")
+
+    pass

+ 42 - 59
main/get_feeds.py

@@ -11,6 +11,9 @@ import sys
 import time
 import requests
 import urllib3
+
+from main.feishu_lib import Feishu
+
 sys.path.append(os.getcwd())
 from main.common import Common
 
@@ -19,15 +22,15 @@ proxies = {"http": None, "https": None}
 
 def get_feeds():
     """
-    获取视频信息后:
-        1.先在 video.txt 中去重
-        2.再从 basic.txt 中去重
-        3.添加视频信息至 basic.txt
+    1.从看一看+小程序首页推荐,获取视频列表
+    2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c 中去重
+    3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl 中去重
+    4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
     """
     host = "https://search.weixin.qq.com"
     url = '/cgi-bin/recwxa/recwxavideolist?'
-    get_video_list_session = Common.get_session()
-    Common.crawler_log().info("获取视频list时,session:{}".format(get_video_list_session))
+    video_list_session = Common.get_session()
+    Common.crawler_log().info("获取视频list时,session:{}".format(video_list_session))
     header = {
         "Connection": "keep-alive",
         "content-type": "application/json",
@@ -38,7 +41,7 @@ def get_feeds():
         "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
     }
     params = {
-        'session': get_video_list_session,
+        'session': video_list_session,
         "offset": 0,
         "wxaVersion": "3.9.2",
         "count": "10",
@@ -58,8 +61,8 @@ def get_feeds():
 
         if "data" not in response:
             Common.crawler_log().info("获取视频list时,session过期,随机睡眠 31-50 秒")
-            # 如果返回空信息,则随机睡眠 50-100 秒
-            time.sleep(random.randint(31, 50))
+            # 如果返回空信息,则随机睡眠 31-40 秒
+            time.sleep(random.randint(31, 40))
             get_feeds()
         elif "items" not in response["data"]:
             Common.crawler_log().info("获取视频list时,返回空信息,随机睡眠 1-3 分钟")
@@ -82,7 +85,8 @@ def get_feeds():
                         .replace("/", "").replace("\\", "").replace("\r", "")\
                         .replace(":", "").replace("*", "").replace("?", "")\
                         .replace("?", "").replace('"', "").replace("<", "")\
-                        .replace(">", "").replace("|", "").replace(" ", "")
+                        .replace(">", "").replace("|", "").replace(" ", "")\
+                        .replace("&NBSP", "").replace(".", "。").replace(" ", "")
                     Common.crawler_log().info('视频标题:{}'.format(video_title))
                 
                     # 获取视频播放次数
@@ -176,61 +180,40 @@ def get_feeds():
                             or url == "":
                         Common.crawler_log().info("无效视频")
                     else:
-                        # 从 kanyikan_videoid.txt 去重
-                        videoids = Common.read_txt("kanyikan_videoid.txt")
-                        if video_id in [vid.strip() for vid in videoids]:
+                        # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                        if video_id in [j for i in Feishu.get_values_batch("20ce0c") for j in i]:
                             Common.crawler_log().info("该视频已下载:{}".format(video_title))
                         else:
                             Common.crawler_log().info("该视频未下载:{}".format(video_title))
-                            # 获取当前时间
-                            basic_time = int(time.time())
 
-                            # 从 kanyikan_feeds.txt 去重
-                            contents = Common.read_txt("kanyikan_feeds.txt")
-                            # 文件为空时,直接添加该视频
-                            if len(contents) == 0:
-                                Common.crawler_log().info("添加该视频信息至kanyikan_feeds.txt:{}".format(video_title))
-                                # 当前时间、视频 ID、播放量 存储到 kanyikan_feeds.txt
-                                with open(r"./txt/kanyikan_feeds.txt", "a", encoding="utf8") as f:
-                                    f.write(str(basic_time) + " + "
-                                            + str(video_id) + " + "
-                                            + str(video_play_cnt) + " + "
-                                            + str(video_title) + " + "
-                                            + str(video_duration) + " + "
-                                            + str(video_comment_cnt) + " + "
-                                            + str(video_liked_cnt) + " + "
-                                            + str(video_shared_cnt) + " + "
-                                            + str(video_resolution) + " + "
-                                            + str(video_send_date) + " + "
-                                            + str(video_user) + " + "
-                                            + str(video_user_cover) + " + "
-                                            + str(video_cover) + " + "
-                                            + str(url) + " + "
-                                            + Common.get_session() + "\n")
+                            # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
+                            if video_id in [j for i in Feishu.get_values_batch("Y8N3Vl") for j in i]:
+                                Common.crawler_log().info("该视频已在kanyikan_feeds_1中:{}".format(video_title))
                             else:
-                                # 文件不为空时,再做去重
-                                if video_id in [content.split(" + ")[1] for content in contents]:
-                                    Common.crawler_log().info("该视频已在kanyikan_feeds.txt中:{}".format(video_title))
-                                else:
-                                    Common.crawler_log().info("添加该视频信息至kanyikan_feeds.txt:{}".format(video_title))
-                                    # 当前时间、视频 ID、播放量 存储到 kanyikan_feeds.txt
-                                    with open(r"./txt/kanyikan_feeds.txt", "a", encoding="utf8") as f:
-                                        f.write(str(basic_time) + " + "
-                                                + str(video_id) + " + "
-                                                + str(video_play_cnt) + " + "
-                                                + str(video_title) + " + "
-                                                + str(video_duration) + " + "
-                                                + str(video_comment_cnt) + " + "
-                                                + str(video_liked_cnt) + " + "
-                                                + str(video_shared_cnt) + " + "
-                                                + str(video_resolution) + " + "
-                                                + str(video_send_date) + " + "
-                                                + str(video_user) + " + "
-                                                + str(video_user_cover) + " + "
-                                                + str(video_cover) + " + "
-                                                + str(url) + " + "
-                                                + Common.get_session() + "\n")
+                                Common.crawler_log().info("添加该视频信息至kanyikan_feeds_1:{}".format(video_title))
+
+                                # 看一看+工作表,插入首行
+                                print(Feishu.insert_columns("Y8N3Vl"))
 
+                                # 获取当前时间
+                                get_feeds_time = int(time.time())
+                                # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
+                                Feishu.update_values("Y8N3Vl",
+                                                     a1=str(get_feeds_time),
+                                                     b1=str(video_id),
+                                                     c1=str(video_play_cnt),
+                                                     d1=str(video_title),
+                                                     e1=str(video_duration),
+                                                     f1=str(video_comment_cnt),
+                                                     g1=str(video_liked_cnt),
+                                                     h1=str(video_shared_cnt),
+                                                     i1=str(video_resolution),
+                                                     j1=str(video_send_date),
+                                                     k1=str(video_user),
+                                                     l1=str(video_user_cover),
+                                                     m1=str(video_cover),
+                                                     n1=str(url),
+                                                     o1=str(video_list_session))
     except Exception as e:
         Common.crawler_log().error("获取视频 list 时异常:{}".format(e))
 

+ 0 - 6
main/run.py

@@ -39,8 +39,6 @@ class Main:
         Common.del_charles_files()
         # 删除多余日志
         Common.del_logs()
-        # 统计累计下载数量
-        Common.kanyikan_download_count()
 
     @classmethod
     def download_up_job(cls):
@@ -65,8 +63,6 @@ class Main:
         Common.del_charles_files()
         # 删除多余日志
         Common.del_logs()
-        # 统计累计下载数量
-        Common.kanyikan_download_count()
 
     @classmethod
     def download_sendtime_job(cls):
@@ -91,8 +87,6 @@ class Main:
         Common.del_charles_files()
         # 删除多余日志
         Common.del_logs()
-        # 统计累计下载数量
-        Common.kanyikan_download_count()
 
     @classmethod
     def main(cls):

Деякі файли не було показано, через те що забагато файлів було змінено