wangkun hace 2 años
padre
commit
15bef61010
Se han modificado 8 ficheros con 1358 adiciones y 0 borrados
  1. 38 0
      README.md
  2. 3 0
      main/__init__.py
  3. 140 0
      main/common.py
  4. 65 0
      main/demo.py
  5. 310 0
      main/feishu_lib.py
  6. 516 0
      main/gzh_recommend.py
  7. 254 0
      main/publish.py
  8. 32 0
      main/run_gzh_recommend.py

+ 38 - 0
README.md

@@ -0,0 +1,38 @@
+微信公众号爬虫
+
+git:https://git.yishihui.com/Server/crawler_gzh.git
+
+feishu:https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?
+
+loguru==0.6.0
+oss2==2.15.0
+requests==2.27.1
+urllib3==1.26.9
+python==3.10
+
+# 入口:
+
+cd ./crawler
+
+python3 ./crawler_gzh/main/run_xxx.py
+
+
+# 按照数据指标抓取 2022/8/4 https://w42nne6hzg.feishu.cn/docx/doxcndwbtMudFHh7r4alaJoykke
+
+1、任务开始时间
+
+- 每天早上8点-晚上21点
+
+2、抓取规则:
+
+- 视频时长1分钟以上,20分钟以下 
+- 
+- 站内标题=公众号 视频原标题 
+- 
+- 站内封面图=公众号 视频原封面图
+
+3、站内承接:
+
+- 每日入库100条视频 
+
+- 视频随机分配到5个虚拟账号。uid列表:

+ 3 - 0
main/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/8/1

+ 140 - 0
main/common.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/8/1
+"""
+公共方法,包含:生成log / 删除log / 下载方法 / 读取文件 / 统计下载数
+"""
+from datetime import date, timedelta
+from loguru import logger
+import datetime
+import os
+import time
+import requests
+import urllib3
+
+proxies = {"http": None, "https": None}
+
+
+class Common:
+    # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
+    now = datetime.datetime.now()
+    # 昨天 <class 'str'>  2022-04-13
+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
+    # 今天 <class 'datetime.date'>  2022-04-14
+    today = date.today()
+    # 明天 <class 'str'>  2022-04-15
+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+
+    # 使用 logger 模块生成日志
+    @staticmethod
+    def logger(log_type):
+        """
+        使用 logger 模块生成日志
+        """
+        # 日志路径
+        log_dir = "./logs/"
+        log_path = os.getcwd() + os.sep + log_dir
+        if not os.path.isdir(log_path):
+            os.makedirs(log_path)
+
+        # 日志文件名
+        if log_type == "recommend":
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-gzh-recommend.log'
+        else:
+            log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '-gzh.log'
+
+        # 日志不打印到控制台
+        logger.remove(handler_id=None)
+
+        # rotation="500 MB",实现每 500MB 存储一个文件
+        # rotation="12:00",实现每天 12:00 创建一个文件
+        # rotation="1 week",每周创建一个文件
+        # retention="10 days",每隔10天之后就会清理旧的日志
+        # 初始化日志
+        logger.add(log_dir + log_name, level="INFO", rotation='00:00')
+
+        return logger
+
+    # 清除日志,保留最近 6 个文件
+    @classmethod
+    def del_logs(cls, log_type):
+        """
+        清除冗余日志文件
+        :d_dir: 需要删除的 log 地址
+        :return: 保留最近 6 个日志
+        """
+        logs_dir = "./logs/"
+        if not os.path.exists(logs_dir):
+            os.mkdir(logs_dir)
+
+        all_files = sorted(os.listdir(logs_dir))
+        all_logs = []
+        for log in all_files:
+            name = os.path.splitext(log)[-1]
+            if name == ".log":
+                all_logs.append(log)
+
+        if len(all_logs) <= 6:
+            pass
+        else:
+            for file in all_logs[:len(all_logs) - 6]:
+                os.remove(logs_dir + file)
+        cls.logger(log_type).info("清除冗余日志成功")
+
+    # 封装下载视频或封面的方法
+    @classmethod
+    def download_method(cls, log_type, text, d_name, d_url):
+        """
+        下载封面:text == "cover" ; 下载视频:text == "video"
+        需要下载的视频标题:d_title
+        视频封面,或视频播放地址:d_url
+        下载保存路径:"./files/{d_title}/"
+        """
+        videos_dir = "./videos/"
+        if not os.path.exists(videos_dir):
+            os.mkdir(videos_dir)
+        # 首先创建一个保存该视频相关信息的文件夹
+        video_dir = "./videos/" + d_name + "/"
+        if not os.path.exists(video_dir):
+            os.mkdir(video_dir)
+
+        # 下载视频
+        if text == "video":
+            # 需要下载的视频地址
+            video_url = d_url
+            # 视频名
+            video_name = "video.mp4"
+
+            # 下载视频
+            urllib3.disable_warnings()
+            response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
+            try:
+                with open(video_dir + video_name, "wb") as f:
+                    for chunk in response.iter_content(chunk_size=10240):
+                        f.write(chunk)
+                cls.logger(log_type).info("==========视频下载完成==========")
+            except Exception as e:
+                cls.logger(log_type).exception("视频下载失败:{}", e)
+
+        # 下载封面
+        elif text == "cover":
+            # 需要下载的封面地址
+            cover_url = d_url
+            # 封面名
+            cover_name = "image.jpg"
+            # # 封面名
+            # cover_name = d_name + ".jpg"
+
+            # 下载封面
+            urllib3.disable_warnings()
+            response = requests.get(cover_url, proxies=proxies, verify=False)
+            try:
+                with open(video_dir + cover_name, "wb") as f:
+                    f.write(response.content)
+                cls.logger(log_type).info("==========封面下载完成==========")
+            except Exception as e:
+                cls.logger(log_type).exception("封面下载失败:{}", e)
+
+
+if __name__ == "__main__":
+    common = Common()

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 65 - 0
main/demo.py


+ 310 - 0
main/feishu_lib.py

@@ -0,0 +1,310 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/8/1
+import json
+import requests
+import urllib3
+from main.common import Common
+proxies = {"http": None, "https": None}
+
+
+class Feishu:
+    """
+    编辑飞书云文档
+    """
+    # 看一看爬虫数据表
+    kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
+    # 快手爬虫数据表
+    kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
+    # 微视爬虫数据表
+    weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
+    # 小年糕爬虫数据表
+    xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
+    # 数据监控表
+    crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
+    # 本山祝福数据表
+    crawler_benshanzhufu = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?"
+    # 公众号爬虫表
+    gzh_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?"
+
+    # 飞书路径token
+    @classmethod
+    def spreadsheettoken(cls, crawler):
+        """
+        :param crawler: 哪个爬虫
+        """
+        if crawler == "kanyikan":
+            return "shtcngRPoDYAi24x52j2nDuHMih"
+        elif crawler == "kuaishou":
+            return "shtcnp4SaJt37q6OOOrYzPMjQkg"
+        elif crawler == "weishi":
+            return "shtcn5YSWg91JfVGzj0SFZIRRPh"
+        elif crawler == "xiaoniangao":
+            return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
+        elif crawler == "monitor":
+            return "shtcnlZWYazInhf7Z60jkbLRJyd"
+        elif crawler == "bszf":
+            return "shtcnGh2rrsPYM4iVNEBO7OqWrb"
+        elif crawler == "gzh":
+            return "shtcnexNXnpDLHhARw0QdiwbYuA"
+
+    # 获取飞书api token
+    @classmethod
+    def get_token(cls, log_type):
+        """
+        获取飞书api token
+        :return:
+        """
+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
+
+        try:
+            urllib3.disable_warnings()
+            response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
+            tenant_access_token = response.json()["tenant_access_token"]
+            return tenant_access_token
+        except Exception as e:
+            Common.logger(log_type).error("获取飞书 api token 异常:{}", e)
+
+    # 获取表格元数据
+    @classmethod
+    def get_metainfo(cls, log_type, crawler):
+        """
+        获取表格元数据
+        :return:
+        """
+        get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                           + cls.spreadsheettoken(crawler) + "/metainfo"
+
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            "extFields": "protectedRange",  # 额外返回的字段,extFields=protectedRange时返回保护行列信息
+            "user_id_type": "open_id"  # 返回的用户id类型,可选open_id,union_id
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
+            response = json.loads(r.content.decode("utf8"))
+            return response
+        except Exception as e:
+            Common.logger(log_type).error("获取表格元数据异常:{}", e)
+
+    # 读取工作表中所有数据
+    @classmethod
+    def get_values_batch(cls, log_type, crawler, sheetid):
+        """
+        读取工作表中所有数据
+        :param log_type: 启用哪个 log
+        :param crawler: 哪个爬虫
+        :param sheetid: 哪张表
+        :return: 所有数据
+        """
+        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                               + cls.spreadsheettoken(crawler) + "/values_batch_get"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            # 多个查询范围 如 url?ranges=range1,range2 ,其中 range 包含 sheetId 与单元格范围两部分
+            "ranges": sheetid,
+
+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
+            # valueRenderOption=FormattedValue 计算并格式化单元格;
+            # valueRenderOption=Formula单元格中含有公式时返回公式本身;
+            # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
+            "valueRenderOption": "ToString",
+
+            # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
+            "dateTimeRenderOption": "",
+
+            # 返回的用户id类型,可选open_id,union_id
+            "user_id_type": "open_id"
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
+            # print(r.text)
+            response = json.loads(r.content.decode("utf8"))
+            values = response["data"]["valueRanges"][0]["values"]
+            return values
+        except Exception as e:
+            Common.logger(log_type).error("读取工作表所有数据异常:{}", e)
+
+    # 工作表,插入行或列
+    @classmethod
+    def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
+        """
+        工作表插入行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param majordimension:行或者列, ROWS、COLUMNS
+        :param startindex:开始位置
+        :param endindex:结束位置
+        """
+        insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                             + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "dimension": {
+                "sheetId": sheetid,
+                "majorDimension": majordimension,  # 默认 ROWS ,可选 ROWS、COLUMNS
+                "startIndex": startindex,  # 开始的位置
+                "endIndex": endindex  # 结束的位置
+            },
+            "inheritStyle": "AFTER"  # BEFORE 或 AFTER,不填为不继承 style
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("插入行或列:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger(log_type).error("插入行或列异常:{}", e)
+
+    # 写入数据
+    @classmethod
+    def update_values(cls, log_type, crawler, sheetid, ranges, values):
+        """
+        写入数据
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param ranges:单元格范围
+        :param values:写入的具体数据,list
+        """
+        update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                            + cls.spreadsheettoken(crawler) + "/values_batch_update"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "valueRanges": [
+                {
+                    "range": sheetid + "!" + ranges,
+                    "values": values
+                },
+            ],
+        }
+
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("写入数据:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger(log_type).error("写入数据异常:{}", e)
+
+    # 合并单元格
+    @classmethod
+    def merge_cells(cls, log_type, crawler, sheetid, ranges):
+        """
+        合并单元格
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid:哪张工作表
+        :param ranges:需要合并的单元格范围
+        """
+        merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                          + cls.spreadsheettoken(crawler) + "/merge_cells"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+
+        body = {
+            "range": sheetid + "!" + ranges,
+            "mergeType": "MERGE_ROWS"
+        }
+
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("合并单元格:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger(log_type).error("合并单元格异常:{}", e)
+
+    # 读取单元格数据
+    @classmethod
+    def get_range_value(cls, log_type, crawler, sheetid, cell):
+        """
+        读取单元格内容
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid: 哪张工作表
+        :param cell: 哪个单元格
+        :return: 单元格内容
+        """
+        get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外);
+            # valueRenderOption=FormattedValue 计算并格式化单元格;
+            # valueRenderOption=Formula 单元格中含有公式时返回公式本身;
+            # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
+            "valueRenderOption": "FormattedValue",
+
+            # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
+            "dateTimeRenderOption": "",
+
+            # 返回的用户id类型,可选open_id,union_id
+            "user_id_type": "open_id"
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
+            # print(r.text)
+            return r.json()["data"]["valueRange"]["values"][0]
+        except Exception as e:
+            Common.logger(log_type).error("读取单元格数据异常:{}", e)
+
+    # 删除行或列,可选 ROWS、COLUMNS
+    @classmethod
+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
+        """
+        删除行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid:工作表
+        :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
+        :param startindex:开始的位置
+        :param endindex:结束的位置
+        :return:
+        """
+        dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/dimension_range"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(log_type),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "dimension": {
+                "sheetId": sheetid,
+                "majorDimension": major_dimension,
+                "startIndex": startindex,
+                "endIndex": endindex
+                }
+            }
+        try:
+            urllib3.disable_warnings()
+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger(log_type).info("删除视频数据:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger(log_type).error("删除视频数据异常:{}", e)
+
+
+if __name__ == "__main__":
+    feishu = Feishu()
+
+    pass

+ 516 - 0
main/gzh_recommend.py

@@ -0,0 +1,516 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/8/1
+# import time
+# import base64
+import json
+import os
+import time
+# import urllib.parse
+import requests
+import urllib3
+from main.common import Common
+from main.feishu_lib import Feishu
+from main.publish import Publish
+
+proxies = {"http": None, "https": None}
+
+
+class Recommend:
+    # 获取 token,保存至飞书云文档
+    @classmethod
+    def get_token(cls, log_type):
+        # charles 抓包文件保存目录
+        charles_file_dir = "../crawler-kanyikan-recommend/chlsfiles/"
+
+        if int(len(os.listdir(charles_file_dir))) == 1:
+            Common.logger(log_type).info("未找到chlsfile文件,等待60s")
+            time.sleep(60)
+        else:
+            try:
+                # 目标文件夹下所有文件
+                all_file = sorted(os.listdir(charles_file_dir))
+
+                # 获取到目标文件
+                old_file = all_file[-1]
+
+                # 分离文件名与扩展名
+                new_file = os.path.splitext(old_file)
+
+                # 重命名文件后缀
+                os.rename(os.path.join(charles_file_dir, old_file),
+                          os.path.join(charles_file_dir, new_file[0] + ".txt"))
+
+                with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
+                    contents = json.load(f, strict=False)
+                # 定义需要返回的列表
+                request_info = []
+                for content in contents:
+                    if "mp.weixin.qq.com" in content['host']:
+                        if content["path"] == r"/mp/getappmsgext":
+                            headers = content["request"]["header"]["headers"]
+                            title = content["request"]["body"]["text"].split("title=")[-1].split("&ct=")[0]
+                            vid = content["request"]["body"]["text"].split("vid=")[-1].split("&is_pay_subscribe")[0]
+                            request_info.append(title)
+                            request_info.append(vid)
+                            for h in headers:
+                                if h["name"] == "cookie" and "pass_ticket" in h["value"]:
+                                    pass_ticket = h["value"].split("pass_ticket=")[-1]
+                                    # print(f"pass_ticket:{pass_ticket}")
+                                    request_info.append(pass_ticket)
+
+                                if h["name"] == "referer":
+                                    __biz = h["value"].split("__biz=")[-1].split("&mid=")[0]
+                                    # print(f"__biz:{__biz}")
+                                    request_info.append(__biz)
+
+                                if h["name"] == "cookie" and "appmsg_token" in h["value"]:
+                                    appmsg_token = h["value"].split("appmsg_token=")[-1]
+                                    # print(f"appmsg_token:{appmsg_token}")
+                                    request_info.append(appmsg_token)
+                                if h["name"] == "cookie" and "wap_sid2" in h["value"]:
+                                    wap_sid2 = h["value"].split("wap_sid2=")[-1]
+                                    # print(f"wap_sid2:{wap_sid2}")
+                                    request_info.append(wap_sid2)
+                            return request_info
+            except Exception as e:
+                Common.logger(log_type).error("获取session异常,30s后重试:{}", e)
+                time.sleep(30)
+                cls.get_token(log_type)
+
+    # 获取推荐列表
+    @classmethod
+    def get_recommend(cls, log_type):
+        try:
+            params = cls.get_token(log_type)
+            if params is None:
+                Common.logger(log_type).info("未获取到token等信息,30s后重试")
+                time.sleep(30)
+                cls.get_recommend(log_type)
+            else:
+                title = params[0]
+                vid = params[1]
+                __biz = params[2]
+                appmsg_token = params[3]
+                pass_ticket = params[4]
+                wap_sid2 = params[5]
+
+                url = "https://mp.weixin.qq.com/mp/getappmsgext?"
+                headers = {
+                    "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
+                    "accept": "*/*",
+                    "x-requested-with": "XMLHttpRequest",
+                    "accept-language": "zh-cn",
+                    "accept-encoding": "gzip, deflate, br",
+                    "origin": "https://mp.weixin.qq.com",
+                    "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 "
+                                  "(KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.26(0x18001a29)"
+                                  " NetType/WIFI Language/zh_CN",
+                    "referer": "https://mp.weixin.qq.com/s?"
+                               "__biz=Mzg5MDY2NzY5Nw=="
+                               "&mid=2247484710"
+                               "&idx=1"
+                               "&sn=657a341da42ed071aaa4d3ce853f64f2"
+                               "&chksm=cfd852f8f8afdbeef513340dec8702433bd78137e7b4afb665d1de5014dc6837ed4dcc979684"
+                               "&sessionid=1659509075"
+                               "&channel_session_id="
+                               "&scene=136"
+                               "&subscene="
+                               "&exptype="
+                               "&reloadid=1659509075"
+                               "&reloadseq=2"
+                               "&related_video_source=10"
+                               "&ascene=1"
+                               "&devicetype=iOS14.7.1"
+                               "&version=18001a29"
+                               "&nettype=WIFI"
+                               "&abtest_cookie=AAACAA%3D%3D"
+                               "&lang=zh_CN"
+                               "&session_us=gh_7364edd0ca9f"
+                               "&fontScale=100"
+                               "&exportkey=AdT9lhjADCG9r69d1meNZ5c%3D"
+                               "&pass_ticket=" + pass_ticket +
+                               "&wx_header=3",
+                }
+                query_string = {
+                    "f": "json",
+                    "mock": "",
+                    "uin": "777",
+                    "key": "777",
+                    "pass_ticket": pass_ticket,
+                    "wxtoken": "",
+                    "devicetype": "iOS14.7.1",
+                    "clientversion": "18001a29",
+                    "__biz": __biz,
+                    "appmsg_token": appmsg_token,
+                    "x5": "0",
+                    # "f": "json"
+                }
+                cookies = {
+                    "appmsg_token": appmsg_token,
+                    "devicetype": "iOS14.7.1",
+                    "lang": "zh_CN",
+                    "pass_ticket": pass_ticket,
+                    "rewardsn": "",
+                    "version": "18001a29",
+                    "wap_sid2": wap_sid2,
+                    "wxtokenkey": "777",
+                    "wxuin": "2010747860"
+                }
+                form = {
+                    "r": "0.13440037781889225",
+                    "__biz": __biz,
+                    "appmsg_type": "9",
+                    "mid": "2247484710",
+                    "sn": "657a341da42ed071aaa4d3ce853f64f2",
+                    "idx": "1",
+                    "scene": "136",
+                    "title": title,
+                    "ct": "1654824718",
+                    "abtest_cookie": "AAACAA==",
+                    "devicetype": "iOS14.7.1",
+                    "version": "18001a29",
+                    "is_need_ticket": "0",
+                    "is_need_ad": "1",
+                    "comment_id": "0",
+                    "is_need_reward": "0",
+                    "both_ad": "0",
+                    "reward_uin_count": "0",
+                    "send_time": "",
+                    "msg_daily_idx": "1",
+                    "is_original": "0",
+                    "is_only_read": "1",
+                    "req_id": "0314yH9rphN660ejUCz1hRVD",
+                    "pass_ticket": pass_ticket,
+                    "is_temp_url": "0",
+                    "item_show_type": "5",
+                    "tmp_version": "1",
+                    "more_read_type": "0",
+                    "appmsg_like_type": "2",
+                    "related_video_sn": "",
+                    "related_video_num": "5",
+                    "vid": vid,
+                    "is_pay_subscribe": "0",
+                    "pay_subscribe_uin_count": "0",
+                    "has_red_packet_cover": "0",
+                    "album_id": "1296223588617486300",
+                    "album_video_num": "5",
+                    "cur_album_id": "",
+                    "is_public_related_video": "0",
+                    "encode_info_by_base64": "0",
+                    "exptype": ""
+                }
+                urllib3.disable_warnings()
+                response = requests.post(url=url, headers=headers, cookies=cookies, params=query_string, data=form,
+                                         verify=False)
+
+                if "related_tag_video" not in response.json():
+                    Common.logger(log_type).warning("response:{}\n", response.text)
+                elif len(response.json()["related_tag_video"]) == 0:
+                    Common.logger(log_type).warning("response:{}\n", response.text)
+                    time.sleep(10)
+                    cls.get_recommend(log_type)
+                else:
+                    feeds = response.json()["related_tag_video"]
+                    for m in range(len(feeds)):
+                        # video_title
+                        if "title" not in feeds[m]:
+                            video_title = 0
+                        else:
+                            video_title = feeds[m]["title"]
+                            # video_title = base64.b64decode(video_title).decode("utf-8")
+
+                        # video_id
+                        if "vid" not in feeds[m]:
+                            video_id = 0
+                        else:
+                            video_id = feeds[m]["vid"]
+
+                        # play_cnt
+                        if "read_num" not in feeds[m]:
+                            play_cnt = 0
+                        else:
+                            play_cnt = feeds[m]["read_num"]
+
+                        # like_cnt
+                        if "like_num" not in feeds[m]:
+                            like_cnt = 0
+                        else:
+                            like_cnt = feeds[m]["like_num"]
+
+                        # duration
+                        if "duration" not in feeds[m]:
+                            duration = 0
+                        else:
+                            duration = feeds[m]["duration"]
+
+                        # video_width / video_height
+                        if "videoWidth" not in feeds[m] or "videoHeight" not in feeds[m]:
+                            video_width = 0
+                            video_height = 0
+                        else:
+                            video_width = feeds[m]["videoWidth"]
+                            video_height = feeds[m]["videoHeight"]
+
+                        # send_time
+                        if "pubTime" not in feeds[m]:
+                            send_time = 0
+                        else:
+                            send_time = feeds[m]["pubTime"]
+
+                        # user_name
+                        if "srcDisplayName" not in feeds[m]:
+                            user_name = 0
+                        else:
+                            user_name = feeds[m]["srcDisplayName"]
+                            # user_name = base64.b64decode(user_name).decode("utf-8")
+
+                        # user_id
+                        if "srcUserName" not in feeds[m]:
+                            user_id = 0
+                        else:
+                            user_id = feeds[m]["srcUserName"]
+
+                        # head_url
+                        if "head_img_url" not in feeds[m]:
+                            head_url = 0
+                        else:
+                            head_url = feeds[m]["head_img_url"]
+
+                        # cover_url
+                        if "cover" not in feeds[m]:
+                            cover_url = 0
+                        else:
+                            cover_url = feeds[m]["cover"]
+
+                        # video_url
+                        if "url" not in feeds[m]:
+                            video_url = 0
+                        else:
+                            video_url = feeds[m]["url"]
+
+                        # 下载链接
+                        download_url = cls.get_url(log_type, video_url)
+
+                        Common.logger(log_type).info("video_title:{}", video_title)
+                        Common.logger(log_type).info("video_id:{}", video_id)
+                        Common.logger(log_type).info("play_cnt:{}", play_cnt)
+                        Common.logger(log_type).info("like_cnt:{}", like_cnt)
+                        Common.logger(log_type).info("duration:{}", duration)
+                        Common.logger(log_type).info("video_width:{}", video_width)
+                        Common.logger(log_type).info("video_height:{}", video_height)
+                        Common.logger(log_type).info("send_time:{}", send_time)
+                        Common.logger(log_type).info("user_name:{}", user_name)
+                        Common.logger(log_type).info("user_id:{}", user_id)
+                        Common.logger(log_type).info("head_url:{}", head_url)
+                        Common.logger(log_type).info("cover_url:{}", cover_url)
+                        Common.logger(log_type).info("video_url:{}", video_url)
+                        Common.logger(log_type).info("download_url:{}", download_url)
+
+                        if video_id == 0 or video_title == 0 or duration == 0 or video_url == 0:
+                            Common.logger(log_type).info("无效视频\n")
+                        elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, "gzh", "fCs3BT") for x in
+                                               y]:
+                            Common.logger(log_type).info("该视频已下载\n")
+                        elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, "gzh", "zWKFGb") for x in
+                                               y]:
+                            Common.logger(log_type).info("该视频已在feeds中\n")
+                        else:
+                            Feishu.insert_columns(log_type, "gzh", "zWKFGb", "ROWS", 1, 2)
+                            get_feeds_time = int(time.time())
+                            values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time)),
+                                       "推荐榜",
+                                       video_title,
+                                       str(video_id),
+                                       play_cnt,
+                                       like_cnt,
+                                       duration,
+                                       str(video_width) + "*" + str(video_height),
+                                       time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(send_time)),
+                                       user_name,
+                                       user_id,
+                                       head_url,
+                                       cover_url,
+                                       video_url,
+                                       download_url
+                                       ]]
+                            time.sleep(1)
+                            Feishu.update_values(log_type, "gzh", "zWKFGb", "D2:T2", values)
+                            Common.logger(log_type).info("添加至recommend_feeds成功\n")
+
+        except Exception as e:
+            Common.logger(log_type).error("get_recommend异常:{}", e)
+
+    # 获取视频下载链接
+    @classmethod
+    def get_url(cls, log_type, url):
+        try:
+            payload = {}
+            headers = {
+                'Cookie': 'rewardsn=; wxtokenkey=777'
+            }
+            urllib3.disable_warnings()
+            response = requests.get(url=url, headers=headers, data=payload, verify=False)
+            response_list = response.text.splitlines()
+            video_url_list = []
+            for m in response_list:
+                if "mpvideo.qpic.cn" in m:
+                    video_url = m.split("url: '")[1].split("',")[0].replace(r"\x26amp;", "&")
+                    video_url_list.append(video_url)
+            video_url = video_url_list[0]
+            return video_url
+        except Exception as e:
+            Common.logger(log_type).error("get_url异常:{}", e)
+
+    # 下载/上传
+    @classmethod
+    def download_publish(cls, log_type, env):
+        try:
+            recommend_feeds_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
+            for i in range(1, len(recommend_feeds_sheet)):
+                download_video_title = recommend_feeds_sheet[i][5]
+                download_video_id = recommend_feeds_sheet[i][6]
+                download_video_play_cnt = recommend_feeds_sheet[i][7]
+                download_video_like_cnt = recommend_feeds_sheet[i][8]
+                download_video_duration = recommend_feeds_sheet[i][9]
+                download_width_height = recommend_feeds_sheet[i][10]
+                download_video_send_time = recommend_feeds_sheet[i][11]
+                download_user_name = recommend_feeds_sheet[i][12]
+                download_user_id = recommend_feeds_sheet[i][13]
+                download_head_url = recommend_feeds_sheet[i][14]
+                download_cover_url = recommend_feeds_sheet[i][15]
+                download_video_url = recommend_feeds_sheet[i][17]
+                download_video_comment_cnt = 0
+                download_video_share_cnt = 0
+
+                Common.logger(log_type).info("正在判断第{}行", i + 1)
+                Common.logger(log_type).info("download_video_title:{}", download_video_title)
+                Common.logger(log_type).info("download_video_id:{}", download_video_id)
+                Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
+                Common.logger(log_type).info("download_video_duration:{}", download_video_duration)
+                Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
+                Common.logger(log_type).info("download_video_url:{}\n", download_video_url)
+                # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
+                # Common.logger(log_type).info("download_width_height:{}", download_width_height)
+                # Common.logger(log_type).info("download_user_name:{}", download_user_name)
+                # Common.logger(log_type).info("download_user_id:{}", download_user_id)
+                # Common.logger(log_type).info("download_head_url:{}", download_head_url)
+                # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
+
+                # 过滤空行
+                if download_video_id is None or download_video_title is None or download_video_play_cnt is None:
+                    Common.logger(log_type).warning("空行,略过\n")
+                # # 过滤敏感词
+                # elif any(word if word in download_video_title else False for word in
+                #          cls.sensitive_words(log_type)) is True:
+                #     Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
+                #     Common.logger(log_type).info("视频已中敏感词,删除成功\n")
+                #     return
+                # # 下载规则
+                # elif cls.download_rule(download_video_share_cnt, download_video_play_cnt) is False:
+                #     Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
+                #     Common.logger(log_type).info("不满足下载规则,删除成功\n")
+                #     return
+                # 时长小于 60s,删除
+                elif int(download_video_duration) < 60:
+                    Feishu.dimension_range(log_type, "gzh", "zWKFGb", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("时长{}<60,删除成功\n", download_video_duration)
+                    return
+                # 已下载视频表去重
+                elif str(download_video_id) in [n for m in Feishu.get_values_batch(log_type, "gzh", "fCs3BT")
+                                                for n in m]:
+                    Feishu.dimension_range(log_type, "gzh", "zWKFGb", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("该视频在公众号中已下载,删除成功\n")
+                    return
+                # 看一看已下载表去重
+                elif str(download_video_id) in [n for m in Feishu.get_values_batch(log_type, "kanyikan", "20ce0c")
+                                                for n in m]:
+                    Feishu.dimension_range(log_type, "gzh", "zWKFGb", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("该视频在看一看中已下载,删除成功\n")
+                    return
+                else:
+                    # 下载封面
+                    Common.download_method(log_type=log_type, text="cover",
+                                           d_name=str(download_video_title), d_url=str(download_cover_url))
+                    # 下载视频
+                    Common.download_method(log_type=log_type, text="video",
+                                           d_name=str(download_video_title), d_url=str(download_video_url))
+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                    with open("./videos/" + download_video_title + "/" + "info.txt",
+                              "a", encoding="UTF-8") as f_a:
+                        f_a.write(str(download_video_id) + "\n" +
+                                  str(download_video_title) + "\n" +
+                                  str(download_video_duration) + "\n" +
+                                  str(download_video_play_cnt) + "\n" +
+                                  str(download_video_comment_cnt) + "\n" +
+                                  str(download_video_like_cnt) + "\n" +
+                                  str(download_video_share_cnt) + "\n" +
+                                  str(download_width_height) + "\n" +
+                                  str(int(time.mktime(
+                                      time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
+                                  str(download_user_name) + "\n" +
+                                  str(download_head_url) + "\n" +
+                                  str(download_video_url) + "\n" +
+                                  str(download_cover_url) + "\n" +
+                                  "gzh")
+                    Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
+
+                    # 上传视频
+                    Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
+                    our_video_id = Publish.upload_and_publish(log_type, env, "play")
+                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
+                    Common.logger(log_type).info("视频上传完成:{}", download_video_title)
+
+                    # 保存视频 ID 到云文档
+                    Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
+                    # 视频ID工作表,插入首行
+                    Feishu.insert_columns(log_type, "gzh", "fCs3BT", "ROWS", 1, 2)
+                    # 视频ID工作表,首行写入数据
+                    upload_time = int(time.time())
+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                               "推荐榜",
+                               str(download_video_title),
+                               str(download_video_id),
+                               our_video_link,
+                               download_video_play_cnt,
+                               download_video_like_cnt,
+                               download_video_duration,
+                               str(download_width_height),
+                               str(download_video_send_time),
+                               str(download_user_name),
+                               str(download_user_id),
+                               str(download_head_url),
+                               str(download_cover_url),
+                               str(download_video_url)]]
+                    time.sleep(1)
+                    Feishu.update_values(log_type, "gzh", "fCs3BT", "D2:W2", values)
+
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    Feishu.dimension_range(log_type, "gzh", "zWKFGb", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
+                    return
+
+        except Exception as e:
+            Common.logger(log_type).error("download_publish异常:{}", e)
+
+    # 执行下载/上传
+    @classmethod
+    def run_download_publish(cls, log_type, env):
+        try:
+            while True:
+                recommend_feeds_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
+                if len(recommend_feeds_sheet) == 1:
+                    Common.logger(log_type).info("下载/上传完成")
+                    break
+                else:
+                    cls.download_publish(log_type, env)
+        except Exception as e:
+            Common.logger(log_type).error("run_download_publish异常:{}", e)
+
+
+if __name__ == "__main__":
+    Recommend.get_recommend("recommend")
+    # Recommend.download_publish("recommend")
+    # Recommend.run_download_publish("recommend", "dev")
+    # token = Recommend.get_token("recommend")
+    # print(token)

+ 254 - 0
main/publish.py

@@ -0,0 +1,254 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/8/1
+import json
+import os
+import random
+import time
+
+import oss2
+import requests
+import urllib3
+from main.common import Common
+
+proxies = {"http": None, "https": None}
+
+
+class Publish:
+    @classmethod
+    def publish_video_dev(cls, log_type, request_data):
+        """
+        loginUid  站内uid (随机)
+        appType  默认:888888
+        crawlerSrcId   站外视频ID
+        crawlerSrcCode   渠道(自定义 KYK)
+        crawlerSrcPublishTimestamp  视频原发布时间
+        crawlerTaskTimestamp   爬虫创建时间(可以是当前时间)
+        videoPath  视频oss地址
+        coverImgPath  视频封面oss地址
+        title  标题
+        totalTime  视频时长
+        viewStatus  视频的有效状态 默认1
+        versionCode  版本 默认1
+        :return:
+        """
+        # Common.logger().info('publish request data: {}'.format(request_data))
+        result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
+        # Common.logger(log_type).info('publish result: {}'.format(result))
+        video_id = result["data"]["id"]
+        # Common.logger(log_type).info('video_id: {}'.format(video_id))
+        if result['code'] != 0:
+            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
+        else:
+            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+        return video_id
+
+    @classmethod
+    def publish_video_prod(cls, log_type, request_data):
+        """
+        loginUid  站内uid (随机)
+        appType  默认:888888
+        crawlerSrcId   站外视频ID
+        crawlerSrcCode   渠道(自定义 KYK)
+        crawlerSrcPublishTimestamp  视频原发布时间
+        crawlerTaskTimestamp   爬虫创建时间(可以是当前时间)
+        videoPath  视频oss地址
+        coverImgPath  视频封面oss地址
+        title  标题
+        totalTime  视频时长
+        viewStatus  视频的有效状态 默认1
+        versionCode  版本 默认1
+        :return:
+        """
+        result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
+        # Common.logger(log_type).info('publish result: {}'.format(result))
+        video_id = result["data"]["id"]
+        # Common.logger(log_type).info('video_id: {}'.format(video_id))
+        if result['code'] != 0:
+            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
+        else:
+            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
+        return video_id
+
+    @classmethod
+    def request_post(cls, request_url, request_data):
+        """
+        post 请求 HTTP接口
+        :param request_url: 接口URL
+        :param request_data: 请求参数
+        :return: res_data json格式
+        """
+        urllib3.disable_warnings()
+        response = requests.post(url=request_url, data=request_data, proxies=proxies, verify=False)
+        if response.status_code == 200:
+            res_data = json.loads(response.text)
+            return res_data
+
+    # 以下代码展示了基本的文件上传、下载、罗列、删除用法。
+
+    # 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。
+    # 通过环境变量获取,或者把诸如“<你的AccessKeyId>”替换成真实的AccessKeyId等。
+    #
+    # 以杭州区域为例,Endpoint可以是:
+    #   http://oss-cn-hangzhou.aliyuncs.com
+    #   https://oss-cn-hangzhou.aliyuncs.com
+    # 分别以HTTP、HTTPS协议访问。
+    access_key_id = os.getenv('OSS_TEST_ACCESS_KEY_ID', 'LTAIP6x1l3DXfSxm')
+    access_key_secret = os.getenv('OSS_TEST_ACCESS_KEY_SECRET', 'KbTaM9ars4OX3PMS6Xm7rtxGr1FLon')
+    bucket_name = os.getenv('OSS_TEST_BUCKET', 'art-pubbucket')
+    # endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou-internal.aliyuncs.com')
+    endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com')
+
+    # 确认上面的参数都填写正确了
+    for param in (access_key_id, access_key_secret, bucket_name, endpoint):
+        assert '<' not in param, '请设置参数:' + param
+
+    # 创建Bucket对象,所有Object相关的接口都可以通过Bucket对象来进行
+    bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
+
+    """
+    处理流程:
+    1. 定时(每天凌晨1点执行一次)循环files文件下的内容 结构:files -> 视频文件夹 -> 视频文件 + 封面图 + 基本信息
+    2. 视频文件和封面上传到oss
+    - 视频文件oss目录  longvideo/crawler_local/video/prod/文件名
+    - 视频封面oss目录  longvideo/crawler_local/image/prod/文件名
+    3. 发布视频
+    - 读取 基本信息 调用发布接口
+    """
+    # env 日期20220225 文件名
+    oss_file_path_video = 'longvideo/crawler_local/video/{}/{}/{}'
+    oss_file_path_image = 'longvideo/crawler_local/image/{}/{}/{}'
+
+    @classmethod
+    def put_file(cls, log_type, oss_file, local_file):
+        cls.bucket.put_object_from_file(oss_file, local_file)
+        Common.logger(log_type).info("put oss file = {}, local file = {} success".format(oss_file, local_file))
+
+    # 清除本地文件
+    @classmethod
+    def remove_local_file(cls, log_type, local_file):
+        os.remove(local_file)
+        Common.logger(log_type).info("remove local file = {} success".format(local_file))
+
+    # 清除本地文件夹
+    @classmethod
+    def remove_local_file_dir(cls, log_type, local_file):
+        os.rmdir(local_file)
+        Common.logger(log_type).info("remove local file dir = {} success".format(local_file))
+
+    local_file_path = '.\\videos'
+    video_file = 'video'
+    image_file = 'image'
+    info_file = 'info'
+    uids_dev_up = [6267140]
+    uids_dev_play = [6267141]
+    uids_prod_up = [20631273, 20631274, 20631275, 20631276, 20631277]
+    uids_prod_play = [20631273, 20631274, 20631275, 20631276, 20631277]
+
+    @classmethod
+    def upload_and_publish(cls, log_type, env, job):
+        """
+        上传视频到 oss
+        :param log_type: 选择的 log
+        :param env: 测试环境:dev,正式环境:prod
+        :param job: 上升榜:up,播放量:play
+        """
+        Common.logger(log_type).info("upload_and_publish starting...")
+        today = time.strftime("%Y%m%d", time.localtime())
+        # videos 目录下的所有视频文件夹
+        files = os.listdir(cls.local_file_path)
+        for f in files:
+            try:
+                # 单个视频文件夹
+                fi_d = os.path.join(cls.local_file_path, f)
+                # 确认为视频文件夹
+                if os.path.isdir(fi_d):
+                    Common.logger(log_type).info('dir = {}'.format(fi_d))
+                    # 列出所有视频文件夹
+                    dir_files = os.listdir(fi_d)
+                    data = {'appType': '888888',
+                            'crawlerSrcCode': 'GONGZHONGHAO',
+                            'viewStatus': '1',
+                            'versionCode': '1'}
+                    now_timestamp = int(round(time.time() * 1000))
+                    data['crawlerTaskTimestamp'] = str(now_timestamp)
+                    global uid
+                    if env == "dev" and job == "up":
+                        uid = str(random.choice(cls.uids_dev_up))
+                    elif env == "dev" and job == "play":
+                        uid = str(random.choice(cls.uids_dev_play))
+                    elif env == "prod" and job == "up":
+                        uid = str(random.choice(cls.uids_prod_up))
+                    elif env == "prod" and job == "play":
+                        uid = str(random.choice(cls.uids_prod_play))
+                    data['loginUid'] = uid
+                    # 单个视频文件夹下的所有视频文件
+                    for fi in dir_files:
+                        # 视频文件夹下的所有文件路径
+                        fi_path = fi_d + '\\' + fi
+                        Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
+                        # 读取 info.txt,赋值给 data
+                        if cls.info_file in fi:
+                            f = open(fi_path, "r", encoding="UTF-8")
+                            # 读取数据 数据准确性写入的时候保证 读取暂不处理
+                            for i in range(14):
+                                line = f.readline()
+                                line = line.replace('\n', '')
+                                if line is not None and len(line) != 0 and not line.isspace():
+                                    # Common.logger(log_type).info("line = {}".format(line))
+                                    if i == 0:
+                                        data['crawlerSrcId'] = line
+                                    elif i == 1:
+                                        data['title'] = line
+                                    elif i == 2:
+                                        data['totalTime'] = line
+                                    elif i == 8:
+                                        data['crawlerSrcPublishTimestamp'] = line
+                                else:
+                                    Common.logger(log_type).warning("{} line is None".format(fi_path))
+                            f.close()
+                            # remove info.txt
+                            cls.remove_local_file(log_type, fi_path)
+                    # 刷新数据
+                    dir_files = os.listdir(fi_d)
+                    for fi in dir_files:
+                        fi_path = fi_d + '\\' + fi
+                        # Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
+                        # 上传oss
+                        if cls.video_file in fi:
+                            global oss_video_file
+                            if env == "dev":
+                                oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId'])
+                            elif env == "prod":
+                                oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId'])
+                            Common.logger(log_type).info("oss_video_file = {}".format(oss_video_file))
+                            cls.put_file(log_type, oss_video_file, fi_path)
+                            data['videoPath'] = oss_video_file
+                            Common.logger(log_type).info("videoPath = {}".format(oss_video_file))
+                        elif cls.image_file in fi:
+                            global oss_image_file
+                            if env == "dev":
+                                oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId'])
+                            elif env == "prod":
+                                oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId'])
+                            Common.logger(log_type).info("oss_image_file = {}".format(oss_image_file))
+                            cls.put_file(log_type, oss_image_file, fi_path)
+                            data['coverImgPath'] = oss_image_file
+                            Common.logger(log_type).info("coverImgPath = {}".format(oss_image_file))
+                        # 全部remove
+                        cls.remove_local_file(log_type, fi_path)
+
+                    # 发布
+                    if env == "dev":
+                        video_id = cls.publish_video_dev(log_type, data)
+                    elif env == "prod":
+                        video_id = cls.publish_video_prod(log_type, data)
+                    else:
+                        video_id = cls.publish_video_dev(log_type, data)
+                    cls.remove_local_file_dir(log_type, fi_d)
+                    return video_id
+
+                else:
+                    Common.logger(log_type).error('file not a dir = {}'.format(fi_d))
+            except Exception as e:
+                Common.logger(log_type).exception('upload_and_publish error', e)

+ 32 - 0
main/run_gzh_recommend.py

@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/8/3
+import datetime
+import os
+import random
+import sys
+import time
+
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.gzh_recommend import Recommend
+
+
+class Main:
+    @classmethod
+    def main(cls):
+        while True:
+            if 21 >= datetime.datetime.now().hour >= 8:
+                # 获取列表
+                Recommend.get_recommend("recommend")
+                # 下载/上传
+                Recommend.run_download_publish("recommend", "prod")
+                # 清除日志
+                Common.del_logs("recommend")
+                # 随机休眠 10 - 20s
+                Common.logger("recommend").info("随机休眠 10 - 20s")
+                time.sleep(random.randint(10, 20))
+
+
+if __name__ == "__main__":
+    Main.main()

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio