zhangyong 10 mesiacov pred
rodič
commit
38a7374004

+ 5 - 0
common/__init__.py

@@ -0,0 +1,5 @@
+from .common_log import Common
+from .aliyun_oss import Oss
+from .feishu_form import Material
+from .feishu_utils import Feishu
+from .mysql_db import MysqlHelper

+ 84 - 0
common/aliyun_oss.py

@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+# @Time: 2023/12/26
+from datetime import datetime
+from typing import Dict, Any,  Optional
+
+import oss2
+import requests
+
+# OSS_BUCKET_PATH = "douyin"
+OSS_ACCESS_KEY_ID = "LTAIP6x1l3DXfSxm"
+OSS_ACCESS_KEY_SECRET = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
+OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou-internal.aliyuncs.com"# 内网地址
+# OSS_BUCKET_ENDPOINT = "oss-cn-hangzhou.aliyuncs.com" # 外网地址
+OSS_BUCKET_NAME = "art-crawler"
+class Oss():
+    # 抓取视频上传到art-crawler
+    @classmethod
+    def video_sync_upload_oss(cls, src_url: str,
+                        video_id: str,
+                        account_id: str,
+                        OSS_BUCKET_PATH: str,
+                        referer: Optional[str] = None) -> Dict[str, Any]:
+        headers = {
+            'Accept': '*/*',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Cache-Control': 'no-cache',
+            'Pragma': 'no-cache',
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
+                          'Chrome/117.0.0.0 Safari/537.36',
+        }
+        if referer:
+            headers.update({'Referer': referer})
+        response = requests.request(url=src_url, method='GET', headers=headers)
+        file_content = response.content
+        content_type = response.headers.get('Content-Type', 'application/octet-stream')
+
+        oss_object_key = f'{OSS_BUCKET_PATH}/{account_id}/{video_id}'
+        auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
+        bucket = oss2.Bucket(auth, OSS_BUCKET_ENDPOINT, OSS_BUCKET_NAME)
+        response = bucket.put_object(oss_object_key, file_content, headers={'Content-Type': content_type})
+
+        if 'Content-Length' in response.headers:
+            return {
+                'status': response.status,
+                'oss_object_key': oss_object_key}
+        raise AssertionError(f'OSS上传失败,请求ID: \n{response.headers["x-oss-request-id"]}')
+
+    """
+    视频发送到art-pubbucket
+    """
+    @classmethod
+    def stitching_sync_upload_oss(cls, src_url: str,
+                        video_id: str) -> Dict[str, Any]:
+        oss_object_key = f'guanggao/video/{video_id}'
+        auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
+        bucket = oss2.Bucket(auth, OSS_BUCKET_ENDPOINT, "art-pubbucket")
+        response = bucket.put_object_from_file(oss_object_key, src_url)
+
+        if 'Content-Length' in response.headers:
+            return {
+                'status': response.status,
+                'oss_object_key': oss_object_key,
+                'save_oss_timestamp': int(datetime.now().timestamp() * 1000),
+            }
+        raise AssertionError(f'OSS上传失败,请求ID: \n{response.headers["x-oss-request-id"]}')
+
+
+    # 获取视频链接 将视频链接有效时间设置为1天
+    @classmethod
+    def get_oss_url(cls, videos, video_path):
+        auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
+        bucket = oss2.Bucket(auth, OSS_BUCKET_ENDPOINT, OSS_BUCKET_NAME)
+        list = []
+        for i in videos:
+            try:
+                # 获取指定路径下的对象列表
+                filename = i[2].split("/")[-1]
+                bucket.get_object_to_file(i[2], f'{video_path}{filename}.mp4')
+                list.append([i[0], i[1], i[2], f'{video_path}{filename}.mp4'])
+            except Exception:
+                continue
+        return list
+
+

+ 48 - 0
common/common_log.py

@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+# @Time: 2023/12/26
+"""
+公共方法,包含:生成log / 删除log / 下载方法 / 删除 weixinzhishu_chlsfiles / 过滤词库 / 保存视频信息至本地 txt / 翻译 / ffmpeg
+"""
+import os
+import sys
+
+sys.path.append(os.getcwd())
+from datetime import date, timedelta
+from datetime import datetime
+from loguru import logger
+
+proxies = {"http": None, "https": None}
+
+
+class Common:
+    # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
+    now = datetime.now()
+    # 昨天 <class 'str'>  2022-04-13
+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
+    # 今天 <class 'datetime.date'>  2022-04-14
+    today = date.today()
+    # 明天 <class 'str'>  2022-04-15
+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+
+    # 使用 logger 模块生成日志
+    @staticmethod
+    def logger(log_type):
+        """
+        使用 logger 模块生成日志
+        """
+        # 日志路径
+        log_dir = f"./{log_type}/logs/"
+        log_path = os.getcwd() + os.sep + log_dir
+        if not os.path.isdir(log_path):
+            os.makedirs(log_path)
+        # 日志文件名
+        log_name = f"{log_type}-{datetime.now().date().strftime('%Y-%m-%d')}.log"
+
+        # 日志不打印到控制台
+        logger.remove(handler_id=None)
+        # 初始化日志
+        logger.add(os.path.join(log_dir, log_name), level="INFO", rotation="00:00", retention="10 days", enqueue=True)
+
+        return logger
+
+

+ 32 - 0
common/feishu_form.py

@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+import os
+import sys
+
+
+sys.path.append(os.getcwd())
+from common.feishu_utils import Feishu
+
+
+class Material():
+    """
+    获取未处理的视频ID
+    """
+    @classmethod
+    def video_list(cls):
+        summary = Feishu.get_values_batch("summary", "f7f695")
+        video_ids = []
+        for row in summary[1:]:
+            video_id = row[0]
+
+            if not video_id:
+                break
+            video_ids.append(str(video_id))
+        return video_ids
+
+
+
+
+
+
+
+

+ 368 - 0
common/feishu_utils.py

@@ -0,0 +1,368 @@
+# -*- coding: utf-8 -*-
+# @Time: 2023/12/26
+"""
+飞书表配置: token 鉴权 / 增删改查 / 机器人报警
+"""
+import json
+import os
+import sys
+import requests
+import urllib3
+
+sys.path.append(os.getcwd())
+from common import Common
+
+proxies = {"http": None, "https": None}
+
+
+class Feishu:
+    """
+    编辑飞书云文档
+    """
+    succinct_url = "https://w42nne6hzg.feishu.cn/sheets/"
+    # 飞书路径token
+    @classmethod
+    def spreadsheettoken(cls, crawler):
+        if crawler == "summary":
+            return "MTVXsuGKUhOI5btTjOEceyJ9nZb"
+        else:
+            return crawler
+
+
+
+    # 获取飞书api token
+    @classmethod
+    def get_token(cls):
+        """
+        获取飞书api token
+        :return:
+        """
+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
+
+        try:
+            urllib3.disable_warnings()
+            response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
+            tenant_access_token = response.json()["tenant_access_token"]
+            return tenant_access_token
+        except Exception as e:
+            Common.logger("feishu").error("获取飞书 api token 异常:{}", e)
+
+    # 获取表格元数据
+    @classmethod
+    def get_metainfo(cls, crawler):
+        """
+        获取表格元数据
+        :return:
+        """
+        try:
+            get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                               + cls.spreadsheettoken(crawler) + "/metainfo"
+
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            params = {
+                "extFields": "protectedRange",  # 额外返回的字段,extFields=protectedRange时返回保护行列信息
+                "user_id_type": "open_id"  # 返回的用户id类型,可选open_id,union_id
+            }
+            urllib3.disable_warnings()
+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
+            response = json.loads(r.content.decode("utf8"))
+            return response
+        except Exception as e:
+            Common.logger("feishu").error("获取表格元数据异常:{}", e)
+
+    # 读取工作表中所有数据
+    @classmethod
+    def get_values_batch(cls, crawler, sheetid):
+        """
+        读取工作表中所有数据
+        :param crawler: 哪个爬虫
+        :param sheetid: 哪张表
+        :return: 所有数据
+        """
+        try:
+            get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                   + cls.spreadsheettoken(crawler) + "/values_batch_get"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            params = {
+                "ranges": sheetid,
+                "valueRenderOption": "ToString",
+                "dateTimeRenderOption": "",
+                "user_id_type": "open_id"
+            }
+            urllib3.disable_warnings()
+            r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
+            response = json.loads(r.content.decode("utf8"))
+            values = response["data"]["valueRanges"][0]["values"]
+            return values
+        except Exception as e:
+            Common.logger("feishu").error("读取工作表所有数据异常:{}", e)
+
+    # 工作表,插入行或列
+    @classmethod
+    def insert_columns(cls, crawler, sheetid, majordimension, startindex, endindex):
+        """
+        工作表插入行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param majordimension:行或者列, ROWS、COLUMNS
+        :param startindex:开始位置
+        :param endindex:结束位置
+        """
+        try:
+            insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                 + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            body = {
+                "dimension": {
+                    "sheetId": sheetid,
+                    "majorDimension": majordimension,  # 默认 ROWS ,可选 ROWS、COLUMNS
+                    "startIndex": startindex,  # 开始的位置
+                    "endIndex": endindex  # 结束的位置
+                },
+                "inheritStyle": "AFTER"  # BEFORE 或 AFTER,不填为不继承 style
+            }
+
+            urllib3.disable_warnings()
+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger("feishu").info("插入行或列:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger("feishu").error("插入行或列异常:{}", e)
+
+    # 写入数据
+    @classmethod
+    def update_values(cls, crawler, sheetid, ranges, values):
+        """
+        写入数据
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param ranges:单元格范围
+        :param values:写入的具体数据,list
+        """
+        try:
+            update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                + cls.spreadsheettoken(crawler) + "/values_batch_update"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            body = {
+                "valueRanges": [
+                    {
+                        "range": sheetid + "!" + ranges,
+                        "values": values
+                    },
+                ],
+            }
+            urllib3.disable_warnings()
+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger("feishu").info("写入数据:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger("feishu").error("写入数据异常:{}", e)
+
+    # 合并单元格
+    @classmethod
+    def merge_cells(cls, crawler, sheetid, ranges):
+        """
+        合并单元格
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid:哪张工作表
+        :param ranges:需要合并的单元格范围
+        """
+        try:
+            merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/merge_cells"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+
+            body = {
+                "range": sheetid + "!" + ranges,
+                "mergeType": "MERGE_ROWS"
+            }
+            urllib3.disable_warnings()
+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger("feishu").info("合并单元格:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger("feishu").error("合并单元格异常:{}", e)
+
+    # 读取单元格数据
+    @classmethod
+    def get_range_value(cls, crawler, sheetid, cell):
+        """
+        读取单元格内容
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid: 哪张工作表
+        :param cell: 哪个单元格
+        :return: 单元格内容
+        """
+        try:
+            get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                  + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            params = {
+                "valueRenderOption": "FormattedValue",
+
+                # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
+                "dateTimeRenderOption": "",
+
+                # 返回的用户id类型,可选open_id,union_id
+                "user_id_type": "open_id"
+            }
+            urllib3.disable_warnings()
+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
+            # print(r.text)
+            return r.json()["data"]["valueRange"]["values"][0]
+        except Exception as e:
+            Common.logger("feishu").error("读取单元格数据异常:{}", e)
+    # 获取表内容
+    @classmethod
+    def get_sheet_content(cls, crawler, sheet_id):
+        try:
+            sheet = Feishu.get_values_batch(crawler, sheet_id)
+            content_list = []
+            for x in sheet:
+                for y in x:
+                    if y is None:
+                        pass
+                    else:
+                        content_list.append(y)
+            return content_list
+        except Exception as e:
+            Common.logger("feishu").error(f'get_sheet_content:{e}\n')
+
+    # 删除行或列,可选 ROWS、COLUMNS
+    @classmethod
+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
+        """
+        删除行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid:工作表
+        :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
+        :param startindex:开始的位置
+        :param endindex:结束的位置
+        :return:
+        """
+        try:
+            dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                  + cls.spreadsheettoken(crawler) + "/dimension_range"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            body = {
+                "dimension": {
+                    "sheetId": sheetid,
+                    "majorDimension": major_dimension,
+                    "startIndex": startindex,
+                    "endIndex": endindex
+                }
+            }
+            urllib3.disable_warnings()
+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger("feishu").info("删除视频数据:{}", r.json()["msg"])
+        except Exception as e:
+            Common.logger("feishu").error("删除视频数据异常:{}", e)
+
+    # 获取用户 ID
+    @classmethod
+    def get_userid(cls, username):
+        try:
+            url = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            name_phone_dict = {
+                "xinxin": "15546206651",
+                "muxinyi": "13699208058",
+                "wangxueke": "13513479926",
+                "yuzhuoyi": "18624010360",
+                "luojunhui": "18801281360",
+                "fanjun": "15200827642",
+                "zhangyong": "17600025055"
+            }
+            username = name_phone_dict.get(username)
+
+            data = {"mobiles": [username]}
+            urllib3.disable_warnings()
+            r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
+            open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
+
+            return open_id
+        except Exception as e:
+            Common.logger("feishu").error(f"get_userid异常:{e}\n")
+
+    # 飞书机器人
+    @classmethod
+    def bot(cls, log_type, crawler, text, mark_name):
+        try:
+            url = "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703"
+            headers = {'Content-Type': 'application/json'}
+            if crawler == "机器自动改造消息通知":
+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/S8jusgF83h8gEKtILW4cli2Bngf?sheet=3e1295"
+                users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
+
+            data = json.dumps({
+                "msg_type": "interactive",
+                "card": {
+                    "config": {
+                        "wide_screen_mode": True,
+                        "enable_forward": True
+                    },
+                    "elements": [{
+                        "tag": "div",
+                        "text": {
+                            "content": users + text,
+                            "tag": "lark_md"
+                        }
+                    }, {
+                        "actions": [{
+                            "tag": "button",
+                            "text": {
+                                "content": "详情,点击~~~~~",
+                                "tag": "lark_md"
+                            },
+                            "url": sheet_url,
+                            "type": "default",
+                            "value": {}
+                        }],
+                        "tag": "action"
+                    }],
+                    "header": {
+                        "title": {
+                            "content": "📣消息来喽~~~",
+                            "tag": "plain_text"
+                        }
+                    }
+                }
+            })
+            urllib3.disable_warnings()
+            r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
+            Common.logger("feishu").info(f'触发机器人消息:{r.status_code}, {text}')
+        except Exception as e:
+            Common.logger("feishu").error(f"bot异常:{e}\n")
+
+
+if __name__ == "__main__":
+    Feishu.bot('recommend', '抖音', '测试: 抖音cookie失效,请及时更换')
+

+ 42 - 0
common/ffmpeg.py

@@ -0,0 +1,42 @@
+
+import subprocess
+class FFmpeg():
+
+    """
+    获取单个视频时长
+    """
+    @classmethod
+    def get_video_duration(cls, video_url):
+        ffprobe_cmd = [
+            "ffprobe",
+            "-i", video_url,
+            "-show_entries", "format=duration",
+            "-v", "quiet",
+            "-of", "csv=p=0"
+        ]
+        output = subprocess.check_output(ffprobe_cmd).decode("utf-8").strip()
+        return float(output)
+
+    """
+    视频更换分辨率
+    """
+    @classmethod
+    def video_duration(cls, video_url, video_path, video_id):
+        duration_url = video_path + str(video_id) + 'duration.mp4'
+        # 获取视频时长
+        total_duration = cls.get_video_duration(video_url)
+        if int(total_duration) > 300:
+            total_duration = 300
+        ffmpeg_cmd = [
+            "ffmpeg",
+            "-i", video_url,
+            "-c:v", "libx264",
+            "-c:a", "aac",
+            "-t", str(total_duration),
+            '-vf', f"scale=720x1280",
+            "-y",
+            duration_url
+        ]
+        subprocess.run(ffmpeg_cmd)
+        return duration_url
+

+ 66 - 0
common/mysql_db.py

@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+"""
+数据库连接及操作
+"""
+import redis
+import pymysql
+from common.common_log import Common
+# from common import Common
+
+class MysqlHelper:
+    @classmethod
+    def connect_mysql(cls):
+        # 创建一个 Connection 对象,代表了一个数据库连接
+        connection = pymysql.connect(
+            host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+            # host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
+            port=3306,  # 端口号
+            user="crawler",  # mysql用户名
+            passwd="crawler123456@",  # mysql用户登录密码
+            db="piaoquan-crawler",  # 数据库名
+            # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+            charset="utf8")
+        return connection
+
+    @classmethod
+    def get_values(cls, sql):
+        try:
+            # 连接数据库
+            connect = cls.connect_mysql()
+            # 返回一个 Cursor对象
+            mysql = connect.cursor()
+
+            # 执行 sql 语句
+            mysql.execute(sql)
+
+            # fetchall方法返回的是一个元组,里面每个元素也是元组,代表一行记录
+            data = mysql.fetchall()
+
+            # 关闭数据库连接
+            connect.close()
+
+            # 返回查询结果,元组
+            return data
+        except Exception as e:
+            print(f"get_values异常:{e}\n")
+
+    @classmethod
+    def update_values(cls, sql):
+        # 连接数据库
+        connect = cls.connect_mysql()
+        # 返回一个 Cursor对象
+        mysql = connect.cursor()
+        try:
+            # 执行 sql 语句
+            res = mysql.execute(sql)
+            # 注意 一定要commit,否则添加数据不生效
+            connect.commit()
+            return res
+        except Exception as e:
+            # 发生错误时回滚
+            connect.rollback()
+        # 关闭数据库连接
+        connect.close()
+
+
+

+ 285 - 0
common/piaoquan_utils.py

@@ -0,0 +1,285 @@
+import random
+import time
+import json
+import requests
+from urllib.parse import urlencode
+
+from common import Common
+from common.sql_help import sqlCollect
+
+
+class PQ:
+
+    """
+    获取视频链接
+    """
+    @classmethod
+    def get_pw_url(cls, user_id):
+        url = f"https://admin.piaoquantv.com/manager/video/detail/{user_id}"
+        payload = {}
+        headers = {
+            'authority': 'admin.piaoquantv.com',
+            'accept': 'application/json, text/plain, */*',
+            'accept-language': 'zh-CN,zh;q=0.9',
+            'cache-control': 'no-cache',
+            'cookie': 'SESSION=YjU3MzgwNTMtM2QyYi00YjljLWI3YWUtZTBjNWYwMGQzYWNl',
+            'pragma': 'no-cache',
+            'referer': f'https://admin.piaoquantv.com/cms/post-detail/{user_id}/detail',
+            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+            'sec-ch-ua-mobile': '?0',
+            'sec-ch-ua-platform': '"macOS"',
+            'sec-fetch-dest': 'empty',
+            'sec-fetch-mode': 'cors',
+            'sec-fetch-site': 'same-origin',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+        }
+
+        response = requests.request("GET", url, headers=headers, data=payload)
+        data = response.json()
+        try:
+            video_url = data["content"]["transedVideoPath"]
+            new_title = data["content"]["title"]
+            return video_url, new_title
+        except Exception as e:
+            Common.logger("video").warning(f"获取视频链接失败:{e}\n")
+            return ""
+
+    """
+     获取视频链接
+    """
+    @classmethod
+    def get_audio_url(cls, task_mark, user_id, title, mark):
+        url = f"https://admin.piaoquantv.com/manager/video/detail/{user_id}"
+        payload = {}
+        headers = {
+            'authority': 'admin.piaoquantv.com',
+            'accept': 'application/json, text/plain, */*',
+            'accept-language': 'zh-CN,zh;q=0.9',
+            'cache-control': 'no-cache',
+            'cookie': 'SESSION=YjU3MzgwNTMtM2QyYi00YjljLWI3YWUtZTBjNWYwMGQzYWNl',
+            'pragma': 'no-cache',
+            'referer': f'https://admin.piaoquantv.com/cms/post-detail/{user_id}/detail',
+            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+            'sec-ch-ua-mobile': '?0',
+            'sec-ch-ua-platform': '"macOS"',
+            'sec-fetch-dest': 'empty',
+            'sec-fetch-mode': 'cors',
+            'sec-fetch-site': 'same-origin',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+        }
+
+        response = requests.request("GET", url, headers=headers, data=payload)
+        data = response.json()
+        try:
+            list = []
+            video_id = data["content"]["id"]
+            if mark:
+                status = sqlCollect.is_used(task_mark, video_id, mark)
+            else:
+                status = True
+            if status:
+                if title == '' or title == None:
+                    new_title = data["content"]["title"]
+                else:
+                    if '/' in title:
+                        titles = [t for t in title.split('/') if t and t != "None"]
+                    else:
+                        titles = [title]
+                    new_title = random.choice(titles)
+                video_url = data["content"]["transedVideoPath"]
+                cover = data["content"]["coverImgPath"]
+                all_data = {"video_id": video_id, "title": new_title, "cover": cover, "video_url": video_url}
+                list.append(all_data)
+                return list
+            return list
+        except Exception as e:
+            Common.logger("video").warning(f"获取视频链接失败:{e}\n")
+            return ""
+
+    """
+    获取用户下的所有视频
+    """
+    @classmethod
+    def get_user_url(cls, task_mark, user_id, number, title, mark):
+        url = f"https://admin.piaoquantv.com/manager/video/page?uid={user_id}&pageNum=1&pageSize=100"
+
+        payload = {}
+        headers = {
+            'accept': 'application/json, text/plain, */*',
+            'cookie': 'SESSION=NjRmMGVjNTAtNzJiNi00ODE0LThjYzQtYmZiNTJhMDNiZTcz',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
+        }
+
+        response = requests.request("GET", url, headers=headers, data=payload)
+        data = response.json()
+        try:
+            content = data["content"]["objs"]
+            list = []
+            for url in content:
+                video_id = url["id"]
+                status = sqlCollect.is_used(task_mark, video_id, mark)
+                if status:
+                    if title == '' or title == None:
+                        new_title = url["title"]
+                    else:
+                        if '/' in title:
+                            titles = title.split('/')
+                        else:
+                            titles = [title]
+                        new_title = random.choice(titles)
+                    cover = url["coverImgPath"]
+                    video_url = url["transedVideoPath"]
+                    all_data = {"video_id": video_id, "title": new_title, "cover": cover, "video_url": video_url}
+                    list.append(all_data)
+                    if len(list) == int(number):
+                        Common.logger("log").info(f"获取视频总数:{len(list)}\n")
+                        return list
+            Common.logger("log").info(f"获取视频总数:{len(list)}\n")
+            return list
+        except Exception as e:
+            Common.logger("log").warning(f"获取音频视频链接失败:{e}\n")
+            return ""
+
+    """
+    获取封面
+    """
+    @classmethod
+    def get_cover(cls, uid):
+        time.sleep(1)
+        url = "https://admin.piaoquantv.com/manager/video/multiCover/listV2"
+
+        payload = json.dumps({
+            "videoId": uid,
+            "range": "2h"
+        })
+        headers = {
+            'accept': 'application/json',
+            'accept-language': 'zh-CN,zh;q=0.9',
+            'cache-control': 'no-cache',
+            'content-type': 'application/json',
+            'cookie': 'SESSION=YjU3MzgwNTMtM2QyYi00YjljLWI3YWUtZTBjNWYwMGQzYWNl',
+            'origin': 'https://admin.piaoquantv.com',
+            'pragma': 'no-cache',
+            'priority': 'u=1, i',
+            'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
+        }
+
+        response = requests.request("POST", url, headers=headers, data=payload)
+        data = response.json()
+        content = data["content"]
+        if len(content) == 1:
+            return content[0]["coverUrl"]
+        max_share_count = 0
+        selected_cover_url = ""
+        for item in content:
+            share_count = item.get("shareWeight")
+            if share_count is not None and share_count > max_share_count:
+                max_share_count = share_count
+                selected_cover_url = item["coverUrl"]
+            elif share_count == max_share_count and item["createUser"] == "用户":
+                selected_cover_url = item["coverUrl"]
+        return selected_cover_url
+
+    """
+    获取标题
+    """
+    @classmethod
+    def get_title(cls, uid):
+        url = "https://admin.piaoquantv.com/manager/video/multiTitleV2/listV2"
+
+        payload = json.dumps({
+            "videoId": uid,
+            "range": "4h"
+        })
+        headers = {
+            'accept': 'application/json',
+            'accept-language': 'zh-CN,zh;q=0.9',
+            'cache-control': 'no-cache',
+            'content-type': 'application/json',
+            'cookie': 'SESSION=YjU3MzgwNTMtM2QyYi00YjljLWI3YWUtZTBjNWYwMGQzYWNl',
+            'origin': 'https://admin.piaoquantv.com',
+            'pragma': 'no-cache',
+            'priority': 'u=1, i',
+            'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
+        }
+        response = requests.request("POST", url, headers=headers, data=payload)
+        data = response.json()
+        content = data["content"]
+        if len(content) == 1:
+            return content[0]["title"]
+        max_share_count = 0
+        selected_title = ""
+        for item in content:
+            share_count = item.get("shareWeight")
+            if share_count is not None and share_count > max_share_count:
+                max_share_count = share_count
+                selected_title = item["title"]
+            elif share_count == max_share_count and item["createUser"] == "用户":
+                selected_title = item["title"]
+        return selected_title
+
+    """
+    新生成视频上传到对应账号下
+    """
+    @classmethod
+    def insert_piaoquantv(cls, new_video_path, new_title):
+
+        url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
+        headers = {
+            'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
+            'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78',
+            'referer': 'http://appspeed.piaoquantv.com',
+            'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
+            'accept-language': 'zh-CN,zh-Hans;q=0.9',
+            'Content-Type': 'application/x-www-form-urlencoded'
+        }
+        payload = {
+            # 'coverImgPath': cover,
+            'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408',
+            'fileExtensions': 'MP4',
+            'loginUid': 70277791,
+            'networkType': 'Wi-Fi',
+            'platform': 'iOS',
+            'requestId': 'fb972cbd4f390afcfd3da1869cd7d001',
+            'sessionId': '362290597725ce1fa870d7be4f46dcc2',
+            'subSessionId': '362290597725ce1fa870d7be4f46dcc2',
+            'title': new_title,
+            'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
+            'uid': 70277791,
+            'versionCode': '486',
+            'versionName': '3.4.12',
+            'videoFromScene': '1',
+            'videoPath': new_video_path,
+            'viewStatus': '1'
+        }
+        encoded_payload = urlencode(payload)
+        response = requests.request("POST", url, headers=headers, data=encoded_payload)
+        data = response.json()
+        code = data["code"]
+        if code == 0:
+            new_video_id = data["data"]["id"]
+            return new_video_id
+        else:
+            return ''
+
+
+    """
+    票圈站内视频下载
+    """
+    @classmethod
+    def download_video(cls, video_url, video_path_url, video_id):
+        for i in range(3):
+            payload = {}
+            headers = {}
+            response = requests.request("GET", video_url, headers=headers, data=payload)
+            if response.status_code == 200:
+                # 以二进制写入模式打开文件
+                video = video_path_url + str(video_id) + '.mp4'
+                with open(f"{video}", "wb") as file:
+                    # 将响应内容写入文件
+                    file.write(response.content)
+                time.sleep(5)
+                return video
+        return ''

+ 106 - 0
common/scheduling_db.py

@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+# @Time: 2023/12/26
+"""
+数据库连接及操作
+"""
+import pymysql
+from common.common_log import Common
+# from common import Common
+
+
+class MysqlHelper:
+    @classmethod
+    def connect_mysql(cls, env, action):
+        if env == 'hk':
+            if action == 'get_author_map':
+                # 创建一个 Connection 对象,代表了一个数据库连接
+                connection = pymysql.connect(
+                    host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+                    port=3306,  # 端口号
+                    user="crawler",  # mysql用户名
+                    passwd="crawler123456@",  # mysql用户登录密码
+                    db="piaoquan-crawler",  # 数据库名
+                    # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                    charset="utf8mb4")
+            else:
+                # 创建一个 Connection 对象,代表了一个数据库连接
+                connection = pymysql.connect(
+                    host="rm-j6cz4c6pt96000xi3.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+                    # host="rm-j6cz4c6pt96000xi3lo.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
+                    port=3306,  # 端口号
+                    user="crawler",  # mysql用户名
+                    passwd="crawler123456@",  # mysql用户登录密码
+                    db="piaoquan-crawler",  # 数据库名
+                    # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                    charset="utf8mb4")
+        elif env == 'prod':
+            # 创建一个 Connection 对象,代表了一个数据库连接
+            connection = pymysql.connect(
+                host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+                # host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
+                # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                charset="utf8mb4")
+        else:
+            # 创建一个 Connection 对象,代表了一个数据库连接
+            connection = pymysql.connect(
+                host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+                # host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址,外网地址
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
+                # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                charset="utf8mb4")
+
+        return connection
+
+    @classmethod
+    def get_values(cls, log_type, crawler, sql, env, action=''):
+        try:
+            # 连接数据库
+            connect = cls.connect_mysql(env, action)
+            # 返回一个 Cursor对象
+            mysql = connect.cursor(cursor=pymysql.cursors.DictCursor)
+
+            # 执行 sql 语句
+            mysql.execute(sql)
+
+            # fetchall方法返回的是一个元组,里面每个元素也是元组,代表一行记录
+            data = mysql.fetchall()
+
+            # 关闭数据库连接
+            connect.close()
+
+            # 返回查询结果,元组
+            return data
+        except Exception as e:
+            Common.logger(log_type).error(f"get_values异常:{e}\n")
+
+    @classmethod
+    def update_values(cls, log_type, crawler, sql, env, action=''):
+        # 连接数据库
+        connect = cls.connect_mysql(env, action)
+        # 返回一个 Cursor对象
+        mysql = connect.cursor()
+
+        try:
+            # 执行 sql 语句
+            res = mysql.execute(sql)
+            # 注意 一定要commit,否则添加数据不生效
+            connect.commit()
+            return res
+        except Exception as e:
+            Common.logger(log_type).error(f"update_values异常,进行回滚操作:{e}\n")
+            # 发生错误时回滚
+            connect.rollback()
+
+        # 关闭数据库连接
+        connect.close()
+
+
+if __name__ == "__main__":
+    pass

+ 28 - 0
common/sql_help.py

@@ -0,0 +1,28 @@
+import os
+import sys
+sys.path.append(os.getcwd())
+from common import MysqlHelper
+
+
+class sqlCollect():
+    """
+    视频信息写入库中
+    """
+    @classmethod
+    def insert_task(cls, video_id):
+        insert_sql = f"""INSERT INTO guanggao_video (video_id) values ('{video_id}')"""
+        MysqlHelper.update_values(
+            sql=insert_sql
+        )
+
+    """
+    判断该任务id是否用过
+    """
+    @classmethod
+    def is_video_id(cls):
+        sql = """
+            SELECT video_id FROM guanggao_video
+        """
+        data = MysqlHelper.get_values(sql)
+        return data
+

+ 5 - 0
config.ini

@@ -0,0 +1,5 @@
+[PATHS]
+VIDEO_PATH = /Users/tzld/Desktop/guanggao_video/path/
+;VIDEO_PATH = /root/video_rewriting/path/
+
+

+ 25 - 0
video_job.py

@@ -0,0 +1,25 @@
+import time
+import schedule
+
+from common import Material
+from video_rewriting.video_prep import getVideo
+
+
+#
+# def video_start():
+#     print("开始执行")
+#     data =  Material.video_list()  # 假设这是一个异步函数
+#     getVideo.video_task(data)  # 假设这是一个异步函数
+#     print("执行完成")
+#
+#
+#
+# schedule.every().day.at("01:00").do(video_start)
+#
+# while True:
+#     schedule.run_pending()
+#     time.sleep(1)
+
+
+data = Material.video_list()  # 假设这是一个异步函数
+getVideo.video_task(data)  # 假设这是一个异步函数

+ 0 - 0
video_rewriting/__init__.py


+ 106 - 0
video_rewriting/video_prep.py

@@ -0,0 +1,106 @@
+import configparser
+import os
+import time
+
+from common import Feishu, Common, Oss
+from common.ffmpeg import FFmpeg
+from common.piaoquan_utils import PQ
+from common.sql_help import sqlCollect
+
+config = configparser.ConfigParser()
+config.read('./config.ini')
+
+
+class getVideo:
+    """
+    根据标示+任务标示创建目录
+    """
+    @classmethod
+    def create_folders(cls):
+        video_path_url = config['PATHS']['VIDEO_PATH']
+        if not os.path.exists(video_path_url):
+            os.makedirs(video_path_url)
+        return video_path_url
+
+
+    """
+    删除文件
+    """
+    @classmethod
+    def remove_files(cls, video_path_url):
+        if os.path.exists(video_path_url) and os.path.isdir(video_path_url):
+            for root, dirs, files in os.walk(video_path_url):
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    os.remove(file_path)
+                for dir in dirs:
+                    dir_path = os.path.join(root, dir)
+                    os.rmdir(dir_path)
+
+    """
+    获取未改造的视频ID
+    """
+    @classmethod
+    def find_unique_id(cls, data, videos):
+        video_ids = [item for item in data if item not in videos]
+        data_video_ids = [item for item in video_ids if item is not None and item != '']
+        return data_video_ids
+
+    """
+    飞书数据处理
+    """
+    @classmethod
+    def video_task(cls, data):
+        video_list = sqlCollect.is_video_id()
+        if video_list:
+            videos = [item[0].replace("'", "").replace("(", "").replace(")", "") for item in video_list]
+        else:
+            videos = []
+        data_video_ids = cls.find_unique_id(data, videos)  # 获取未处理的视频ID
+        video_path_url = cls.create_folders()  # 创建目录
+        if data_video_ids:
+            for v_id in data_video_ids:
+                try:
+                    video_url, new_title = PQ.get_pw_url(v_id)
+                    new_video_path = PQ.download_video(video_url, video_path_url, v_id)  # 下载视频地址
+                    if new_video_path == '':
+                        Common.logger("log").info(f"{video_url}视频下载失败")
+                        cls.remove_files(video_path_url)
+                        continue
+                    url = FFmpeg.video_duration(new_video_path, video_path_url, v_id)
+                    if not os.path.isfile(url):
+                        Common.logger("log").info(f"{v_id}下的视频处理失败")
+                        cls.remove_files(video_path_url)
+                        continue
+                    oss_object_key = Oss.stitching_sync_upload_oss(url, v_id)  # 视频发送OSS
+                    time.sleep(1)
+                    status = oss_object_key.get("status")
+                    if status == 200:
+                        oss_object_key = oss_object_key.get("oss_object_key")
+                        time.sleep(1)
+                        code = PQ.insert_piaoquantv(oss_object_key, new_title)
+                        if code:
+                            Common.logger("log").info(f"{v_id}发送成功")
+                            sqlCollect.insert_task(v_id)  # 插入数据库
+                            time.sleep(1)
+                            values = [[v_id, str(code)]]
+                            Feishu.insert_columns("MTVXsuGKUhOI5btTjOEceyJ9nZb", "JmS9sJ", "ROWS", 1, 2)
+                            time.sleep(0.5)
+                            Feishu.update_values("MTVXsuGKUhOI5btTjOEceyJ9nZb", "JmS9sJ", "A2:Z2", values)
+                        cls.remove_files(video_path_url)
+                    else:
+                        cls.remove_files(video_path_url)
+                        Common.logger("log").info(f"视频ID{v_id} 视频发送OSS失败 ")
+                    time.sleep(10)
+                except Exception as e:
+                    cls.remove_files(video_path_url)
+                    Common.logger("log").info(f"处理失败,错误信息{e}")
+                    continue
+
+
+
+
+
+
+
+