Ver Fonte

新增需求:2022-6-17

wangkun há 3 anos atrás
pai
commit
f1c897fcac
3 ficheiros alterados com 264 adições e 1 exclusões
  1. 9 1
      README.md
  2. 226 0
      main/download_play_sendtime.py
  3. 29 0
      main/run_download_play_sendtime.py

+ 9 - 1
README.md

@@ -8,7 +8,15 @@ urllib3==1.26.9
 
 执行入口:
 cd ./crawler-kanyikan-Windows
-python3 main/run.py
+python3 main/run_XXX.py
+
+==========2022/6/17===========
+(recommend_feeds)0-24点
+- 7日内播放大于2万
+- 或播放大于15万
+- 新增:download_play_sendtime.py
+- 执行入口:run_download_play_sendtime.py
+
 
 ==========2022/6/16===========
 - 1.凌晨0点-15点      3日内播放大于2万 爬取

+ 226 - 0
main/download_play_sendtime.py

@@ -0,0 +1,226 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/6/17
+import json
+import os
+import sys
+import time
+import requests
+import urllib3
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.get_feeds import get_feeds
+from main.publish import Publish
+from main.feishu_lib import Feishu
+
+proxies = {"http": None, "https": None}
+
+
+class DownloadPlaySendtime:
+
+    # 抓取基础规则,写入recommend_feeds表
+    @staticmethod
+    def send_time_rule(send_time_width, send_time_height, send_time_duration, send_time_share_cnt):
+        """
+        1.分辨率,宽或者高 >= 720 or == 0
+        2.时长 >= 40s
+        3.视频播放量 >= 0
+        """
+        if int(send_time_width) >= 720 or int(send_time_height) >= 720:
+            if int(send_time_duration) >= 40:
+                if int(send_time_share_cnt) > 0:
+                    return True
+                else:
+                    return False
+            else:
+                return False
+        else:
+            return False
+
+    @classmethod
+    def download_play_sendtime(cls, env):
+        """
+        - 7日内播放大于2万
+        - 或播放大于15万
+        :param env: 测试环境:dev;正式环境:prod
+        :return: 下载并上传视频
+        """
+        try:
+            for i in range(1, len(Feishu.get_values_batch("SdCHOM"))+1):
+                time.sleep(1)
+
+                sendtime_session = Common.get_session()
+                # Common.logger().info("获取视频info时,session:{}", sendtime_session)
+                download_video_id = Feishu.get_values_batch("SdCHOM")[i][2]
+                download_video_title = Feishu.get_values_batch("SdCHOM")[i][3]
+                url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
+                param = {
+                    "session": sendtime_session,
+                    "vid": download_video_id,
+                    "wxaVersion": "3.9.2",
+                    "channelid": "208201",
+                    "scene": "32",
+                    "subscene": "1089",
+                    "model": "iPhone 11<iPhone12,1>14.7.1",
+                    "clientVersion": "8.0.18",
+                    "sharesearchid": "447665862521758270",
+                    "sharesource": "-1"
+                }
+                urllib3.disable_warnings()
+                r = requests.get(url=url, params=param, proxies=proxies, verify=False)
+                response = json.loads(r.content.decode("utf8"))
+                if "data" not in response:
+                    Common.logger().error("获取视频info时错误,删除该视频:{}", download_video_title)
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                else:
+                    data = response["data"]
+                    v_duration = data["duration"]
+                    v_play_cnt_sendtime = data["played_cnt"]
+                    v_comment_cnt = data["comment_cnt"]
+                    v_liked_cnt = data["liked_cnt"]
+                    v_shared_cnt = data["shared_cnt"]
+                    v_width = data["width"]
+                    v_height = data["height"]
+                    v_resolution = str(v_width) + "*" + str(v_height)
+                    v_send_date = data["upload_time"]
+                    v_username = data["user_info"]["nickname"].strip().replace("\n", "")
+                    v_user_id = data["openid"]
+                    v_user_cover = data["user_info"]["headimg_url"]
+                    v_video_cover = data["cover_url"]
+                    if "items" not in data["play_info"]:
+                        if len(data["play_info"]) > 2:
+                            download_url_sendtime = data["play_info"][2]["play_url"]
+                        else:
+                            download_url_sendtime = data["play_info"][0]["play_url"]
+                    else:
+                        if len(data["play_info"]["items"]) > 2:
+                            download_url_sendtime = data["play_info"]["items"][2]["play_url"]
+                        else:
+                            download_url_sendtime = data["play_info"]["items"][0]["play_url"]
+
+                    Common.logger().info("正在判断第{}行,视频:{}", i, download_video_title)
+
+                    # 判断无效视频
+                    if download_video_id == "" \
+                            or download_video_id is None\
+                            and download_video_title == ""\
+                            or download_video_title is None\
+                            and v_duration == "" \
+                            and v_play_cnt_sendtime == ""\
+                            and v_send_date == ""\
+                            and v_user_cover == "" \
+                            and v_video_cover == ""\
+                            and download_url_sendtime == "":
+                        Common.logger().info("无效视频,删除该视频信息:{}", download_video_title)
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        return
+                    # 抓取基础规则
+                    elif cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is False:
+                        Common.logger().info("不满足发布时间榜下载规则,删除该视频信息:{}", download_video_title)
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        return
+                    # 不满足规则:发布时间 > 7 天 and 播放量 < 150000
+                    elif int(time.time()) - int(v_send_date) > 604800 and int(v_play_cnt_sendtime) < 150000:
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                        Common.logger().info("播放量:{} < 150000", int(v_play_cnt_sendtime))
+                        # 删除行或列,可选 ROWS、COLUMNS
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        return
+                    # 不满足规则:发布时间 <= 7 天 and 播放量 < 20000
+                    elif int(time.time()) - int(v_send_date) <= 604800 and int(v_play_cnt_sendtime) < 20000:
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                        Common.logger().info("视频7天播放量:{} < 20000", int(v_play_cnt_sendtime))
+                        # 删除行或列,可选 ROWS、COLUMNS
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        return
+                    elif download_video_id in [j for m in Feishu.get_values_batch("20ce0c") for j in m]:
+                        Common.logger().info("视频已下载,删除该视频信息:{}", download_video_title)
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        return
+                    else:
+                        Common.logger().info("开始下载视频:{}", download_video_title)
+
+                        # 下载封面
+                        Common.download_method("cover", download_video_title, v_video_cover)
+                        # 下载视频
+                        Common.download_method("video", download_video_title, download_url_sendtime)
+                        # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
+                        with open(r"./videos/" + download_video_title +
+                                  "/" + "info.txt", "a", encoding="utf8") as f_a2:
+                            f_a2.write(str(download_video_id) + "\n" +
+                                       str(download_video_title) + "\n" +
+                                       str(v_duration) + "\n" +
+                                       str(v_play_cnt_sendtime) + "\n" +
+                                       str(v_comment_cnt) + "\n" +
+                                       str(v_liked_cnt) + "\n" +
+                                       str(v_shared_cnt) + "\n" +
+                                       str(v_resolution) + "\n" +
+                                       str(v_send_date) + "\n" +
+                                       str(v_username) + "\n" +
+                                       str(v_user_cover) + "\n" +
+                                       str(download_url_sendtime) + "\n" +
+                                       str(v_video_cover) + "\n" +
+                                       str(sendtime_session))
+                        Common.logger().info("==========视频信息已保存至info.txt==========")
+
+                        # 上传该视频
+                        Common.logger().info("开始上传视频:{}", download_video_title)
+                        Publish.upload_and_publish(env, "send_time")
+
+                        # 保存视频 ID 到云文档:
+                        # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
+                        Common.logger().info("保存视频ID至云文档:{}", download_video_title)
+                        # 看一看+ ,视频ID工作表,插入首行
+                        Feishu.insert_columns("20ce0c", "rows", 1, 2)
+                        # 看一看+ ,视频ID工作表,首行写入数据
+                        upload_time = int(time.time())
+                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                                   "发布时间榜",
+                                   str(download_video_id),
+                                   str(download_video_title),
+                                   v_play_cnt_sendtime,
+                                   v_comment_cnt,
+                                   v_liked_cnt,
+                                   v_shared_cnt,
+                                   v_duration,
+                                   v_resolution,
+                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
+                                   str(v_username),
+                                   str(v_user_id),
+                                   str(v_user_cover),
+                                   str(v_video_cover),
+                                   str(download_url_sendtime)]]
+                        time.sleep(1)
+                        Feishu.update_values("20ce0c", "A2:Q2", values)
+
+                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
+                        Common.logger().info("从云文档删除该视频信息:{}", download_video_title)
+                        # 删除行或列,可选 ROWS、COLUMNS
+                        Feishu.dimension_range("SdCHOM", "ROWS", i + 1, i + 1)
+                        return
+
+        except Exception as e:
+            Common.logger().error("获取视频info异常:{}", e)
+            Feishu.dimension_range("SdCHOM", "ROWS", 2, 2)
+
+    # 执行上传及下载
+    @classmethod
+    def run_download_play_sendtime(cls):
+        try:
+            while True:
+                if len(Feishu.get_values_batch("SdCHOM")) == 1:
+                    break
+                else:
+                    cls.download_play_sendtime("prod")
+        except Exception as e:
+            Common.logger().error("执行上传及下载异常:{}", e)
+
+
+if __name__ == "__main__":
+    download_sendtime = DownloadPlaySendtime()
+    get_feeds()
+    download_sendtime.download_play_sendtime("prod")

+ 29 - 0
main/run_download_play_sendtime.py

@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/6/17
+import datetime
+import os
+import sys
+import time
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.get_feeds import get_feeds
+from main.download_play_sendtime import DownloadPlaySendtime
+
+
+class Main:
+    @classmethod
+    def prod_job(cls):
+        while True:
+            prod_job_time = datetime.datetime.now()
+            if prod_job_time.hour == 0 and prod_job_time.minute <= 10:
+                Common.del_logs()
+                time.sleep(60)
+            else:
+                get_feeds()
+                DownloadPlaySendtime.run_download_play_sendtime()
+
+
+if __name__ == "__main__":
+    main = Main()
+    main.prod_job()