zhangyong 3 月之前
父节点
当前提交
a80fa84d0b
共有 13 个文件被更改,包括 999 次插入0 次删除
  1. 14 0
      Dockerfile
  2. 31 0
      docker-compose.yml
  3. 18 0
      requirements.txt
  4. 0 0
      utils/__init__.py
  5. 411 0
      utils/feishu_utils.py
  6. 95 0
      utils/google_ai_studio.py
  7. 80 0
      utils/gpt4o_mimi.py
  8. 37 0
      utils/odps_data.py
  9. 131 0
      utils/piaoquan.py
  10. 31 0
      utils/redis.py
  11. 0 0
      workers/__init__.py
  12. 97 0
      workers/consumption_work.py
  13. 54 0
      workers/select_work.py

+ 14 - 0
Dockerfile

@@ -0,0 +1,14 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+COPY . .
+
+ENV TZ=Asia/Shanghai
+
+RUN apt update && apt --no-install-recommends install -y libgl-dev libglib2.0-dev \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* \
+    && pip install -r requirements.txt --no-cache-dir \
+    && mkdir -p /app/cache
+
+#ENTRYPOINT ["python", "/app/job.py"]

+ 31 - 0
docker-compose.yml

@@ -0,0 +1,31 @@
+services:
+  worker1:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: supply_job
+    container_name: supply_worker1
+    restart: unless-stopped
+    environment:
+      - ENV=prod
+      - API_KEY=AIzaSyB2kjF2-S2B5cJiosx_LpApd227w33CVvs
+      - TASK_TYPE=recommend
+    networks:
+      - google_net
+    entrypoint: "python /app/workers/select_work.py"
+  worker2:
+    depends_on:
+      - worker1
+    image: supply_job
+    container_name: supply_worker7
+    restart: unless-stopped
+    environment:
+      - ENV=prod
+      - API_KEY=AIzaSyDXeugvEaYpKNrLPavMU1U5GtRhSaNLpAc
+      - TASK_TYPE=recommend
+    networks:
+      - google_net
+    entrypoint: "python /app/workers/consumption_work.py"
+networks:
+  google_net:
+    name: google_net

+ 18 - 0
requirements.txt

@@ -0,0 +1,18 @@
+aliyun-log-python-sdk==0.9.12
+google-generativeai==0.8.3
+loguru==0.7.2
+odps==3.5.1
+opencv-python==4.10.0.84
+redis==5.1.1
+requests==2.32.3
+schedule==1.2.2
+lark-oapi==1.4.8
+orjson==3.10.13
+oss2==2.19.1
+protobuf==5.26.1
+pydantic==2.10.4
+PyMySQL==1.1.1
+redis==5.2.1
+requests==2.32.3
+yarl==1.18.3
+apscheduler==3.11.0

+ 0 - 0
utils/__init__.py


+ 411 - 0
utils/feishu_utils.py

@@ -0,0 +1,411 @@
+# -*- coding: utf-8 -*-
+"""
+飞书表配置: token 鉴权 / 增删改查 / 机器人报警
+"""
+import json
+import os
+import sys
+import requests
+import urllib3
+from loguru import logger
+
+sys.path.append(os.getcwd())
+
+proxies = {"http": None, "https": None}
+
+
+class Feishu:
+    """
+    编辑飞书云文档
+    """
+    succinct_url = "https://w42nne6hzg.feishu.cn/sheets/"
+    # 飞书路径token
+    @classmethod
+    def spreadsheettoken(cls, crawler):
+        if crawler == "summary":
+            return "KsoMsyP2ghleM9tzBfmcEEXBnXg"
+        else:
+            return crawler
+
+
+
+    # 获取飞书api token
+    @classmethod
+    def get_token(cls):
+        """
+        获取飞书api token
+        :return:
+        """
+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
+
+        try:
+            urllib3.disable_warnings()
+            response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
+            tenant_access_token = response.json()["tenant_access_token"]
+            return tenant_access_token
+        except Exception as e:
+            logger.error("获取飞书 api token 异常:{}", e)
+
+    # 获取表格元数据
+    @classmethod
+    def get_metainfo(cls, crawler):
+        """
+        获取表格元数据
+        :return:
+        """
+        try:
+            get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                               + cls.spreadsheettoken(crawler) + "/metainfo"
+
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            params = {
+                "extFields": "protectedRange",  # 额外返回的字段,extFields=protectedRange时返回保护行列信息
+                "user_id_type": "open_id"  # 返回的用户id类型,可选open_id,union_id
+            }
+            urllib3.disable_warnings()
+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
+            response = json.loads(r.content.decode("utf8"))
+            return response
+        except Exception as e:
+            logger.error("获取表格元数据异常:{}", e)
+
+    # 读取工作表中所有数据
+    @classmethod
+    def get_values_batch(cls, crawler, sheetid):
+        """
+        读取工作表中所有数据
+        :param crawler: 哪个爬虫
+        :param sheetid: 哪张表
+        :return: 所有数据
+        """
+        try:
+            get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                   + cls.spreadsheettoken(crawler) + "/values_batch_get"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            params = {
+                "ranges": sheetid,
+                "valueRenderOption": "ToString",
+                "dateTimeRenderOption": "",
+                "user_id_type": "open_id"
+            }
+            urllib3.disable_warnings()
+            r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
+            response = json.loads(r.content.decode("utf8"))
+            values = response["data"]["valueRanges"][0]["values"]
+            return values
+        except Exception as e:
+            logger.error("读取工作表所有数据异常:{}", e)
+
+    # 工作表,插入行或列
+    @classmethod
+    def insert_columns(cls, crawler, sheetid, majordimension, startindex, endindex):
+        """
+        工作表插入行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param majordimension:行或者列, ROWS、COLUMNS
+        :param startindex:开始位置
+        :param endindex:结束位置
+        """
+        try:
+            insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                 + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            body = {
+                "dimension": {
+                    "sheetId": sheetid,
+                    "majorDimension": majordimension,  # 默认 ROWS ,可选 ROWS、COLUMNS
+                    "startIndex": startindex,  # 开始的位置
+                    "endIndex": endindex  # 结束的位置
+                },
+                "inheritStyle": "AFTER"  # BEFORE 或 AFTER,不填为不继承 style
+            }
+
+            urllib3.disable_warnings()
+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
+            logger.info("插入行或列:{}", r.json()["msg"])
+        except Exception as e:
+            logger.error("插入行或列异常:{}", e)
+
+    # 写入数据
+    @classmethod
+    def update_values(cls, crawler, sheetid, ranges, values):
+        """
+        写入数据
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫的云文档
+        :param sheetid:哪张工作表
+        :param ranges:单元格范围
+        :param values:写入的具体数据,list
+        """
+        try:
+            update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                + cls.spreadsheettoken(crawler) + "/values_batch_update"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            body = {
+                "valueRanges": [
+                    {
+                        "range": sheetid + "!" + ranges,
+                        "values": values
+                    },
+                ],
+            }
+            urllib3.disable_warnings()
+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
+            logger.info("写入数据:{}", r.json()["msg"])
+        except Exception as e:
+            logger.error("写入数据异常:{}", e)
+
+    # 合并单元格
+    @classmethod
+    def merge_cells(cls, crawler, sheetid, ranges):
+        """
+        合并单元格
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid:哪张工作表
+        :param ranges:需要合并的单元格范围
+        """
+        try:
+            merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                              + cls.spreadsheettoken(crawler) + "/merge_cells"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+
+            body = {
+                "range": sheetid + "!" + ranges,
+                "mergeType": "MERGE_ROWS"
+            }
+            urllib3.disable_warnings()
+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
+            logger.info("合并单元格:{}", r.json()["msg"])
+        except Exception as e:
+            logger.error("合并单元格异常:{}", e)
+
+    # 读取单元格数据
+    @classmethod
+    def get_range_value(cls, crawler, sheetid, cell):
+        """
+        读取单元格内容
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid: 哪张工作表
+        :param cell: 哪个单元格
+        :return: 单元格内容
+        """
+        try:
+            get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                  + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            params = {
+                "valueRenderOption": "FormattedValue",
+
+                # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化,但不会对数字进行格式化,返回格式化后的字符串。
+                "dateTimeRenderOption": "",
+
+                # 返回的用户id类型,可选open_id,union_id
+                "user_id_type": "open_id"
+            }
+            urllib3.disable_warnings()
+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
+            # print(r.text)
+            return r.json()["data"]["valueRange"]["values"][0]
+        except Exception as e:
+            logger.error("读取单元格数据异常:{}", e)
+    # 获取表内容
+    @classmethod
+    def get_sheet_content(cls, crawler, sheet_id):
+        try:
+            sheet = Feishu.get_values_batch(crawler, sheet_id)
+            content_list = []
+            for x in sheet:
+                for y in x:
+                    if y is None:
+                        pass
+                    else:
+                        content_list.append(y)
+            return content_list
+        except Exception as e:
+            logger.error(f'get_sheet_content:{e}\n')
+
+    # 删除行或列,可选 ROWS、COLUMNS
+    @classmethod
+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
+        """
+        删除行或列
+        :param log_type: 日志路径
+        :param crawler: 哪个爬虫
+        :param sheetid:工作表
+        :param major_dimension:默认 ROWS ,可选 ROWS、COLUMNS
+        :param startindex:开始的位置
+        :param endindex:结束的位置
+        :return:
+        """
+        try:
+            dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
+                                  + cls.spreadsheettoken(crawler) + "/dimension_range"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            body = {
+                "dimension": {
+                    "sheetId": sheetid,
+                    "majorDimension": major_dimension,
+                    "startIndex": startindex,
+                    "endIndex": endindex
+                }
+            }
+            urllib3.disable_warnings()
+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
+            logger.info("删除视频数据:{}", r.json()["msg"])
+        except Exception as e:
+            logger.error("删除视频数据异常:{}", e)
+
+    # 获取用户 ID
+    @classmethod
+    def get_userid(cls, username):
+        try:
+            url = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?"
+            headers = {
+                "Authorization": "Bearer " + cls.get_token(),
+                "Content-Type": "application/json; charset=utf-8"
+            }
+            name_phone_dict = {
+                "xinxin": "15546206651",
+                "muxinyi": "13699208058",
+                "wangxueke": "13513479926",
+                "yuzhuoyi": "18624010360",
+                "luojunhui": "18801281360",
+                "fanjun": "15200827642",
+                "zhangyong": "17600025055",
+                'liukunyu': "18810931977"
+            }
+            username = name_phone_dict.get(username)
+
+            data = {"mobiles": [username]}
+            urllib3.disable_warnings()
+            r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
+            open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
+
+            return open_id
+        except Exception as e:
+            logger.error(f"get_userid异常:{e}\n")
+
+    # 飞书机器人
+    @classmethod
+    def bot(cls, log_type, crawler, text, mark_name):
+        try:
+
+            headers = {'Content-Type': 'application/json'}
+            if crawler == "机器自动改造消息通知":
+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703"
+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=bc154d"
+                users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
+            elif crawler == "快手关键词搜索":
+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703"
+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=U1gySe"
+                users = "".join([f'<at id="{cls.get_userid(type)}">{name}</at>' for type, name in
+                                 zip(log_type, mark_name)])
+                # users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
+            else:
+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/7928f182-08c1-4c4d-b2f7-82e10c93ca80"
+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=bc154d"
+                users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
+            data = json.dumps({
+                "msg_type": "interactive",
+                "card": {
+                    "config": {
+                        "wide_screen_mode": True,
+                        "enable_forward": True
+                    },
+                    "elements": [{
+                        "tag": "div",
+                        "text": {
+                            "content": users + text,
+                            "tag": "lark_md"
+                        }
+                    }, {
+                        "actions": [{
+                            "tag": "button",
+                            "text": {
+                                "content": "详情,点击~~~~~",
+                                "tag": "lark_md"
+                            },
+                            "url": sheet_url,
+                            "type": "default",
+                            "value": {}
+                        }],
+                        "tag": "action"
+                    }],
+                    "header": {
+                        "title": {
+                            "content": "📣消息提醒",
+                            "tag": "plain_text"
+                        }
+                    }
+                }
+            })
+            urllib3.disable_warnings()
+            r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
+            logger.info(f'触发机器人消息:{r.status_code}, {text}')
+        except Exception as e:
+            logger.error(f"bot异常:{e}\n")
+
+    # 飞书机器人-改造计划完成通知
+    @classmethod
+    def finish_bot(cls, text, url, content):
+        try:
+            headers = {'Content-Type': 'application/json'}
+            data = json.dumps({
+                "msg_type": "interactive",
+                "card": {
+                    "config": {
+                        "wide_screen_mode": True,
+                        "enable_forward": True
+                    },
+                    "elements": [{
+                        "tag": "div",
+                        "text": {
+                            "content": text,
+                            "tag": "lark_md"
+                        }
+                    }],
+                    "header": {
+                        "title": {
+                            "content": content,
+                            "tag": "plain_text"
+                        }
+                    }
+                }
+            })
+            urllib3.disable_warnings()
+            r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
+            logger.info(f'触发机器人消息:{r.status_code}, {text}')
+        except Exception as e:
+            logger.error(f"bot异常:{e}\n")
+
+
+if __name__ == "__main__":
+    Feishu.bot('recommend', '抖音', '测试: 抖音cookie失效,请及时更换')
+

+ 95 - 0
utils/google_ai_studio.py

@@ -0,0 +1,95 @@
+import os
+import time
+import uuid
+from typing import  Optional, Tuple
+
+import cv2
+import google.generativeai as genai
+import orjson
+import requests
+from google.generativeai.types import (HarmBlockThreshold, HarmCategory)
+from loguru import logger
+
+
+
+CACHE_DIR = '/Users/z/Downloads/'
+PROXY_ADDR = 'http://localhost:1081'
+os.environ['http_proxy'] = PROXY_ADDR
+os.environ['https_proxy'] = PROXY_ADDR
+
+class GoogleAI(object):
+    @classmethod
+    def get_video_duration(cls, video_link: str) -> int:
+        cap = cv2.VideoCapture(video_link)
+        if cap.isOpened():
+            rate = cap.get(5)
+            frame_num = cap.get(7)
+            duration = int(frame_num / rate)
+            return duration
+        return 0
+
+    @classmethod
+    def download_video(cls, video_link: str) -> Optional[str]:
+        file_path = os.path.join(CACHE_DIR, f'{str(uuid.uuid4())}.mp4')
+        for _ in range(3):
+            try:
+                response = requests.get(url=video_link, timeout=60)
+                if response.status_code == 200:
+                    with open(file_path, 'wb') as f:
+                        f.write(response.content)
+                    logger.info(f'[内容分析] 视频链接: {video_link}, 存储地址: {file_path}')
+                    return file_path
+            except Exception:
+                time.sleep(1)
+                continue
+        return
+
+    @classmethod
+    def run(cls, api_key, video_url):
+        try:
+            genai.configure(api_key=api_key)
+            video_path = cls.download_video(video_link=video_url)
+            if not video_path:
+                logger.error(f'[内容分析] 视频下载失败, 跳过任务')
+                if os.path.exists(video_path):
+                    os.remove(video_path)
+                    logger.info(f"[内容分析] 文件已删除: {video_path}")
+                return "视频下载失败"
+
+            video = genai.upload_file(path=video_path, mime_type='video/mp4')
+            while video.state.name == 'PROCESSING':
+                time.sleep(1)
+                video = genai.get_file(name=video.name)
+            if video.state.name != 'ACTIVE':
+                genai.delete_file(name=video.name)
+                return
+            model = genai.GenerativeModel(
+                model_name='gemini-1.5-flash',
+                generation_config=genai.GenerationConfig(response_mime_type='application/json'),
+                safety_settings={
+                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+                },
+            )
+            response = model.generate_content(
+                contents=[
+                    video,
+                    "你是一名专业的短视频分析师,请你输出这个视频的完整口播,只输出文字即可。使用一下JSON格式输出:{'text': string}",
+                ],
+                stream=False,
+                request_options={
+                    'timeout': 600,
+                },
+            )
+            text = orjson.loads(response.text.strip())['text']
+            genai.delete_file(name=video.name)
+            os.remove(video_path)
+            return text
+        except Exception as e:
+            logger.error(f"[内容分析] 处理异常,异常信息{e}")
+            return
+
+
+if __name__ == '__main__':
+    GoogleAI.run("AIzaSyAwGqthDADh5NPVe3BMcOJBQkJaf0HWBuQ",
+                 "http://rescdn.yishihui.com/jq_oss/video/2025012215472528213")
+

+ 80 - 0
utils/gpt4o_mimi.py

@@ -0,0 +1,80 @@
+import json
+
+import requests
+
+
+class GPT4oMini(object):
+
+    @classmethod
+    def get_ai_mini_title(cls, title):
+        """AI标题"""
+        url = "http://aigc-api.cybertogether.net//aigc/dev/test/gpt"
+        payload = json.dumps({
+            "imageList": [],
+            "model": "gpt-4o-mini-2024-07-18",
+            "prompt": (
+                "你是一名短视频标题优化专家,任务是为短视频生成吸引力高且符合规范的标题。请你根据提供的视频口播内容,定位对于中国60岁以上老年人的核心吸引点与传播点,并根据以下要求输出利于老年人转发的标题。"
+                "生成标题要求"
+                 "1. 标题开头包含醒目emoji🔴,整体字符长度必须控制在10-25个字"
+                 "2. 如果视频内容中包含具体的观点,标题则重点突出观点,并增加对观点的肯定或态度,示例:听完觉得太对了,太香了、老外至今难以相信。如果视频中不包含具体观点,可利用口播中最吸引人的内容,若有数字可突出数字,若有描述可突出描述。示例:这样做牛肉就像豆腐一样软,没牙也能吃;韩红慈善20年,累计捐款10个亿"
+                 "3. 禁止编造任何信息,如视频中不包含养老金相关的内容,不能在生成的标题中出现养老金。"
+                 "4. 标题结尾可以根据标题内容增加一些对观点的判断或情绪输出的内容,吸引人点击。请注意,结尾的内容要和前文由关联性,不能看起来毫无关联。示例:你们见过吗、你听对不对、请听、太神奇了、一字一句都是金、句句难以辩驳"
+                 "5. 标题内不能使用强引导分享点击的词句,如:快来看看、大家都听一听、值得一看、都看看吧、你也来看看吧、大家注意、都听听等。"
+                 "6. 标题内不能使用无实质信息和强烈诱导点击、紧急、夸张、震惊的描述,避免使用“震惊国人”、“速看”、“太震撼了”等类似描述。"
+                 "7. 标题需要适应老年人的阅读习惯,尽量使用大白话,不要使用年轻一代的网络流行语,如:太燃了、佛系、躺平、内卷、炸裂等。"
+                "输出要求:"
+                "请基于优秀标题要求,参考优秀标题示例,生成吸引用户注意的标题。只需要输出3个标题即可,不需要更多标题,也不需要增加任何额外的内容。"
+                "优秀标题分类及示例"
+                "1、使用疑问和互动,引发好奇心:"
+                "🔴街头采访趣事,朋友们你认为说的对不对 "
+                "🔴原来这就是中国如此强大的原因!"
+                "🔴穿什么裤子最显年轻? "
+                "🔴真实的台湾,原来是这个样子的! "
+                "🔴涨知识了,日本唯一不敢攻打的城市是这里"
+                "2、关联个人利益和情感:"
+                "🔴如果微信倒闭了,存的钱怎么办?大家注意! "
+                "🔴不结婚不生孩子,老了之后会是什么下场? "
+                "🔴这个视频勾起多少人童年的回忆,你还记得吗 "
+                "🔴这八大建议一经提出,老百姓手都拍红了 "
+                "3、文化和社会热点:"
+                "🔴国庆为什么要放假,看完你就明白了! "
+                "🔴淄博再次因为他而火!到底好不好,你来说了算! "
+                "🔴1900架无人机表演,龙飞凤舞堪比春晚 "
+                "4、对观点/话题的感叹/肯定:"
+                "🔴这就是澳门的黄金发财树!太美了 "
+                "🔴天地之间有杆称,说的真好❗" 
+                f"请分析该内容,视频脚本内容为:{title},返回新的标题。"
+            ),
+            "responseFormat": {
+                "type": "json_schema",
+                "json_schema": {
+                    "strict": True,
+                    "name": "share_script_result",
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "新标题": {
+                                "type": "string",
+                                "description": "生成新的标题"
+                            }
+                        },
+                        "required": ["新标题"],
+                        "additionalProperties": False
+                    }
+                }
+            }
+        })
+        headers = {'Content-Type': 'application/json'}
+        response = requests.post(url, headers=headers, data=payload)
+        response_data = response.json()
+
+        data = json.loads(response_data.get('data', '{}'))
+        new_title = data["新标题"]
+        if new_title:
+            return new_title
+        else:
+            return None
+
+if __name__ == '__main__':
+    text ='主席的思想要传承,这个视频展现了一代伟人的伟大精神,真是让人感慨万千,我们享受的幸福生活,离不开那些为我们打下基础的人,尤其是伟大的毛主席,我们应该时刻铭记历史,珍惜来之不易的生活,如果你也和我一样深感震撼,想要让更多的人了解这段历史,就赶紧把这个视频分享给你所有的朋友和群聊吧,让我们一起传递这份感动,让更多的人铭记这份伟大的精神,祖国万岁,感谢有你'
+    GPT4oMini.get_ai_mini_title(text)

+ 37 - 0
utils/odps_data.py

@@ -0,0 +1,37 @@
+import json
+from odps import ODPS
+
+# ODPS服务配置
+ODPS_CONFIG = {
+    'ENDPOINT': 'http://service.cn.maxcompute.aliyun.com/api',
+    'ACCESSID': 'LTAIWYUujJAm7CbH',
+    'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
+}
+class OdpsDataCount:
+    @classmethod
+    def get_data_count(cls, dt):
+        odps = ODPS(
+            access_id=ODPS_CONFIG['ACCESSID'],
+            secret_access_key=ODPS_CONFIG['ACCESSKEY'],
+            project="loghubods",
+            endpoint=ODPS_CONFIG['ENDPOINT']
+        )
+        data_values = []
+        try:
+            sql = f'SELECT videoid,type,channel,time FROM loghubods.allaround_spider_recommend_video_hour WHERE dt = "{dt}" '
+            with odps.execute_sql(sql).open_reader() as reader:
+                for row in reader:
+                    data_values.append(json.dumps( {"video_id": row[0], "type": row[1], "channel": row[2], "time": row[3], "partition": str(dt)}, ensure_ascii=False ))
+
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return data_values
+        return data_values
+
+    @classmethod
+    def main(cls, dt):
+        data_count = cls.get_data_count( dt= dt)
+        return data_count
+
+if __name__ == '__main__':
+    OdpsDataCount.main()

+ 131 - 0
utils/piaoquan.py

@@ -0,0 +1,131 @@
+
+import requests
+from urllib.parse import urlencode
+import json
+
+
+
+class PQ:
+
+    @classmethod
+    def install_tj_pq(cls, video_id, new_video_path, new_title, n_id, cover_path):
+        url = "https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send?muid=999"
+        payload = {
+            'loginUid': n_id,
+            'oldVideoReRecommendVideoId': video_id,
+            'videoPath': new_video_path,
+            'coverImgPath': cover_path,
+            'appType': 999000,
+            'viewStatus': 1,
+            'versionCode': 100,
+            'fileExtensions': 'mp4',
+            'videoFromScene': 1,
+            'title': new_title,
+            'descr': "",
+            'copyType': 2
+        }
+        headers = {
+            'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
+            'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78; JSESSIONID=3538C8F690744960BC2B4F02B4A3B1E4',
+            'referer': 'http://appspeed.piaoquantv.com',
+            'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
+            'accept-language': 'zh-CN,zh-Hans;q=0.9',
+            'Content-Type': 'application/x-www-form-urlencoded'
+        }
+
+        response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
+        data = response.json()
+        code = data["code"]
+        if code == 0:
+            new_video_id = data["data"]["id"]
+            print(new_video_id)
+            return new_video_id
+
+    """
+    新生成视频上传到对应账号下
+    """
+    @classmethod
+    def insert_piaoquantv(cls, new_video_path, new_title, n_id, cover_path):
+        url = "https://videopre.piaoquantv.com/longvideoapi/crawler/video/send?muid=999"
+        headers = {
+            'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
+            'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78',
+            'referer': 'http://appspeed.piaoquantv.com',
+            'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
+            'accept-language': 'zh-CN,zh-Hans;q=0.9',
+            'Content-Type': 'application/x-www-form-urlencoded'
+        }
+        payload = {
+            'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408',
+            'fileExtensions': 'MP4',
+            'loginUid': n_id,
+            'networkType': 'Wi-Fi',
+            'platform': 'iOS',
+            'requestId': 'fb972cbd4f390afcfd3da1869cd7d001',
+            'sessionId': '362290597725ce1fa870d7be4f46dcc2',
+            'subSessionId': '362290597725ce1fa870d7be4f46dcc2',
+            'title': new_title,
+            'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
+            'uid': n_id,
+            'versionCode': '486',
+            'versionName': '3.4.12',
+            'videoFromScene': '1',
+            'videoPath': new_video_path,
+            'viewStatus': '1',
+            'coverImgPath' : cover_path
+        }
+        encoded_payload = urlencode(payload)
+        response = requests.request("POST", url, headers=headers, data=encoded_payload, timeout=30)
+        data = response.json()
+        code = data["code"]
+        if code == 0:
+            new_video_id = data["data"]["id"]
+            print(new_video_id)
+            return new_video_id
+        return None
+
+    @classmethod
+    def get_pq_oss(cls,video_id):
+        try:
+            url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/getBaseInfo"
+
+            payload = json.dumps({
+                "videoId": int(video_id)
+            })
+            headers = {
+                'Content-Type': 'application/json',
+                'Cookie': 'JSESSIONID=658158EABFCF6AC9B9BB0D8B61897A88'
+            }
+            for i in range(3):
+                response = requests.request("POST", url, headers=headers, data=payload, timeout=30)
+                response = response.json()
+                code = response['code']
+                if code == 0:
+                    data = response['data']
+                    video_path = data["videoPath"]
+                    cover_path = data["coverImgPath"]
+                    return video_path, cover_path
+            return None, None
+        except Exception as e:
+            return None, None
+
+    @classmethod
+    def video_tag(cls, pq_id: str, tag: str):
+        url = "https://admin.piaoquantv.com/manager/video/tag/addVideoTags"
+
+        payload = json.dumps({
+            "videoId": pq_id,
+            "tagNames": tag
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        requests.request("POST", url, headers=headers, data=payload)
+
+
+
+
+
+if __name__ == '__main__':
+    PQ.get_pq_oss(47377130)

+ 31 - 0
utils/redis.py

@@ -0,0 +1,31 @@
+from redis import asyncio as aioredis
+
+
+class RedisHelper(object):
+    _pool: aioredis.connection.ConnectionPool = None
+    _instance = None
+
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls, *args, **kwargs)
+        return cls._instance
+
+    def _get_pool(self) -> aioredis.connection.ConnectionPool:
+        if self._pool is None:
+            self._pool = aioredis.ConnectionPool(
+                host="r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com",  # 外网地址
+                port=6379,
+                db=0,
+                password="Wqsd@2019",
+                max_connections=10)
+        return self._pool
+
+    def get_client(self) -> aioredis.client.Redis:
+        pool = self._get_pool()
+        client = aioredis.Redis(connection_pool=pool)
+
+        return client
+
+    async def close(self):
+        if self._pool:
+            await self._pool.disconnect(inuse_connections=True)

+ 0 - 0
workers/__init__.py


+ 97 - 0
workers/consumption_work.py

@@ -0,0 +1,97 @@
+import asyncio
+import time
+from datetime import datetime
+
+import orjson
+from apscheduler.schedulers.asyncio import AsyncIOScheduler
+from apscheduler.triggers.cron import CronTrigger
+from loguru import logger
+
+from utils.feishu_utils import Feishu
+from utils.google_ai_studio import GoogleAI
+from utils.gpt4o_mimi import GPT4oMini
+from utils.piaoquan import PQ
+from utils.redis import RedisHelper
+
+
+class ConsumptionRecommend(object):
+    @classmethod
+    async def run(cls):
+        logger.info(f"[处理] 开始获取redis数据")
+        task = await RedisHelper().get_client().rpop(name = 'gong_ji_heng_ceng:scan_tasks')
+        if not task:
+            logger.info('[ 改造 ] 无待执行的扫描任务')
+            return
+        task = orjson.loads(task)
+        logger.info(f"[处理] 获取redis数据{task}")
+        video_id = task['video_id']
+        channel = task['channel']
+        logger.info(f"[处理] 开始获取原视频OSS地址")
+        video_path, cover_path = PQ.get_pq_oss(video_id)
+        if not video_path:
+            return
+        logger.info(f"[处理] 获取原视频OSS地址,视频链接:{video_path},封面链接:{cover_path}")
+        video_url = f"http://rescdn.yishihui.com/{video_path}"
+        if channel == "快手品类账号":
+            api_key = 'AIzaSyCTFPsbSfESF0Xybm8_qz7st_SH5E7wsdg'
+        elif channel == "抖音品类账号":
+            api_key = 'AIzaSyAJ8kUcEXRu37SuNx2w5qllaowMcUoPhoU'
+        elif channel == "抖音关键词抓取":
+            api_key = 'AIzaSyC-2Es4bk1uE-6u3lW5AOQuGqXWNzb92eQ'
+        elif channel == "快手关键词抓取":
+            api_key = 'AIzaSyD6R8tIOO11yh6WOXVQMBA2wzSZiREGUrA'
+        else:
+            api_key = 'AIzaSyAwGqthDADh5NPVe3BMcOJBQkJaf0HWBuQ'
+        logger.info(f"[处理] 开始提取口播文案")
+        text = GoogleAI.run(api_key, video_url)
+        if not text:
+            logger.error(f"[处理] 提取口播文案失败")
+            return
+        if "视频下载失败" == text:
+            logger.error(f"[处理] 视频下载失败")
+            return
+        logger.info(f"[处理] 提取口播文案完成")
+        logger.info(f"[处理] 口播文案通过gpt开始生成标题")
+        new_title = GPT4oMini.get_ai_mini_title(text)
+        if not new_title:
+            logger.error(f"[处理] 口播文案通过gpt无法生成标题")
+            return
+        logger.info(f"[处理] 口播文案通过gpt生成标题完成,{new_title}")
+        n_id = "78354423"
+        logger.info(f"[处理] 开始写入票圈后台")
+        code_vid = PQ.install_tj_pq(video_id, video_path, new_title, n_id, cover_path)
+        if not code_vid:
+            logger.error(f"[处理] 写入票圈后台失败")
+            return
+        logger.info(f"[处理] 写入票圈后台成功,视频id:{code_vid}")
+        tag = f"lev-供给,rol-机器,#str-内容理解优化标题_51,{video_id}"
+        PQ.video_tag(code_vid,tag)
+        logger.info(f"[处理] 视频写入标签成功")
+        current_time = datetime.now()
+        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+        logger.info(f"[处理] 开始写入飞书表格")
+        values = [[video_id,code_vid,new_title,channel,task["time"],task["partition"],formatted_time]]
+        Feishu.insert_columns("JY4esfYvShLbTkthHEqcTw5qnsh", "qFpmD4", "ROWS", 1, 2)
+        time.sleep(0.5)
+        Feishu.update_values("JY4esfYvShLbTkthHEqcTw5qnsh", "qFpmD4", "A2:Z2", values)
+        logger.info(f"[处理] 写入飞书表格成功")
+
+
+async def run():
+    scheduler = AsyncIOScheduler()
+    try:
+        scheduler.add_job(ConsumptionRecommend.run, trigger=CronTrigger(minute=3, second=0))  # 每小时获取一次
+        scheduler.start()
+        await asyncio.Event().wait()
+    except KeyboardInterrupt:
+        pass
+    except Exception as e:
+        pass
+    finally:
+        scheduler.shutdown()
+
+
+if __name__ == '__main__':
+    asyncio.run(ConsumptionRecommend.run())
+    # loop = asyncio.get_event_loop()
+    # loop.run_until_complete(run())

+ 54 - 0
workers/select_work.py

@@ -0,0 +1,54 @@
+import asyncio
+import datetime
+import time
+
+import orjson
+from apscheduler.schedulers.asyncio import AsyncIOScheduler
+from apscheduler.triggers.cron import CronTrigger
+from loguru import logger
+
+from utils.feishu_utils import Feishu
+from utils.odps_data import OdpsDataCount
+from utils.redis import RedisHelper
+
+
+class StartGetRecommend(object):
+    @classmethod
+    async def run(cls):
+        dt = (datetime.datetime.now() - datetime.timedelta(hours=2)).strftime('%Y%m%d%H') # 获取前一小时
+        tasks = OdpsDataCount.get_data_count(dt)
+        logger.info(f"[获取] {dt}时间,共获取到{len(tasks)} 条")
+        if len(tasks) > 0:
+            await RedisHelper().get_client().rpush('gong_ji_heng_ceng:scan_tasks', *tasks)
+            logger.info(f"[获取] {dt}时间,共获取到{len(tasks)}条,写入redis成功")
+            logger.info(f"[获取] 开始写入飞书表格")
+            current_time = datetime.datetime.now()
+            formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+            for task in tasks:
+                task = orjson.loads(task)
+                values = [[task['partition'], task['video_id'], task['channel'], task['time'], task["type"],formatted_time]]
+                Feishu.insert_columns("JY4esfYvShLbTkthHEqcTw5qnsh", "f53916", "ROWS", 1, 2)
+                time.sleep(0.5)
+                Feishu.update_values("JY4esfYvShLbTkthHEqcTw5qnsh", "f53916", "A2:Z2", values)
+                logger.info(f"[处理] 写入飞书表格一条成功")
+            logger.info(f"[处理] 写入飞书表格全部成功")
+
+
+async def run():
+    scheduler = AsyncIOScheduler()
+    try:
+        scheduler.add_job(StartGetRecommend.run, trigger=CronTrigger(hour=1, second=0))  # 每小时获取一次
+        scheduler.start()
+        await asyncio.Event().wait()
+    except KeyboardInterrupt:
+        pass
+    except Exception as e:
+        pass
+    finally:
+        scheduler.shutdown()
+
+
+if __name__ == '__main__':
+    asyncio.run(StartGetRecommend.run())
+    # loop = asyncio.get_event_loop()
+    # loop.run_until_complete(run())