2 months ago · 42e9e11fbd
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,62 @@
 
				+# ---> Python
			
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+env/
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*,cover
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+target/
			
 
				+
			
 
				+.idea/
			
 
				+*.DS_Store
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,19 @@
 
				+整体流程：
			
 
				+MQ 消息推送
			
 
				+   ↓
			
 
				+main.py 消费并生成线程
			
 
				+   ↓
			
 
				+handle_message() → 解析消息体（platform + mode）
			
 
				+   ↓
			
 
				+加载 user_list / rule_dict（数据库）
			
 
				+   ↓
			
 
				+执行 UniversalCrawler.run()
			
 
				+   ↓
			
 
				+1. 读取配置
			
 
				+2. 请求视频接口
			
 
				+3. 解析视频数据（字段映射）
			
 
				+4. 构造 VideoItem
			
 
				+5. 推送到 ETL MQ
			
 
				+   ↓
			
 
				+全部成功 → ack 消息
			
 
				+失败 → 不 ack，MQ 自动重试
			
--- a/application/__init__.py
+++ b/application/__init__.py
--- a/application/common/__init__.py
+++ b/application/common/__init__.py
@@ -0,0 +1,6 @@
 
				+from .feishu import Feishu, FeishuInsert
			
 
				+from .log import *
			
 
				+from .messageQueue import *
			
 
				+from .mysql import *
			
 
				+from .proxies import *
			
 
				+from .redis import redis_helper
			
--- a/application/common/feishu/__init__.py
+++ b/application/common/feishu/__init__.py
@@ -0,0 +1,3 @@
 
				+from .feishu import Feishu
			
 
				+from .feishu_insert import FeishuInsert
			
 
				+from .feishu_data import FsData
			
--- a/application/common/feishu/feishu.py
+++ b/application/common/feishu/feishu.py
@@ -0,0 +1,725 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
 
				+"""
			
 
				+飞书表配置: token 鉴权 / 增删改查 / 机器人报警
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+import requests
			
 
				+import urllib3
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+from application.common.log import Local
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class Feishu:
			
 
				+    """
			
 
				+    编辑飞书云文档
			
 
				+    """
			
 
				+    # 看一看爬虫数据表
			
 
				+    kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
			
 
				+    # 快手爬虫数据表
			
 
				+    kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?"
			
 
				+    # 微视爬虫数据表
			
 
				+    weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
			
 
				+    # 小年糕爬虫数据表
			
 
				+    xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
			
 
				+    # 音乐相册
			
 
				+    music_album = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g?"
			
 
				+    # 本山祝福数据表
			
 
				+    crawler_benshanzhufu = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?"
			
 
				+    # 公众号爬虫表
			
 
				+    gzh_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?"
			
 
				+    # 数据监控表
			
 
				+    crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
			
 
				+    # 微群视频爬虫表
			
 
				+    crawler_weiqun_video = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc?"
			
 
				+    # 视频号爬虫表
			
 
				+    crawler_shipinhao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
			
 
				+    # 西瓜视频
			
 
				+    crawler_xigua = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?'
			
 
				+    # 知乎 PC 端
			
 
				+    crawler_zhihu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?'
			
 
				+    # 吉祥幸福
			
 
				+    crawler_jixiangxingfu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnSx4nafMbLTq7xl7RHBwHBf?'
			
 
				+    # 福小顺
			
 
				+    crawler_fuxiaoshun = 'https://w42nne6hzg.feishu.cn/sheets/CoXEsl6MDhMaKKt6GUBcvLwsnWb?'
			
 
				+    # 众妙音信
			
 
				+    crawler_zmyx = 'https://w42nne6hzg.feishu.cn/sheets/shtcnbZIxstPeM0xshW07b26sve?'
			
 
				+    # 岁岁年年迎福气
			
 
				+    crawler_ssnnyfq = 'https://w42nne6hzg.feishu.cn/sheets/shtcnyJmJSJynHDLLbLTkySfvZe?'
			
 
				+    # 祝福猫视频
			
 
				+    crawler_zhufumao = 'https://w42nne6hzg.feishu.cn/sheets/shtcnXfIJthvkjhI5zlEJq84i6g?'
			
 
				+    # 宗教公众号
			
 
				+    crawler_zongjiao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn73NW0CyoOeF21HWO15KBsb?'
			
 
				+    # 好看视频
			
 
				+    crawler_haokan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd'
			
 
				+    # 看到就是福气
			
 
				+    crawler_kandaojiushifuqi = 'https://w42nne6hzg.feishu.cn/sheets/shtcnEokBkIjOUPAk8vbbPKnXgb'
			
 
				+    # 胜胜影音
			
 
				+    crawler_shengshengyingyin = 'https://w42nne6hzg.feishu.cn/sheets/shtcnz1ymxHL1u8WHblfqfys7qe'
			
 
				+    # 刚刚都传
			
 
				+    crawler_ganggangdouchuan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnTuJgeZU2bc7VaesAqk3QJx'
			
 
				+    # 知青天天看
			
 
				+    crawler_zhiqingtiantiankan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnjmhKdJOKdqnEzJcZb5xaHc?'
			
 
				+    # 公众号_信欣
			
 
				+    crawler_gongzhonghao = 'https://w42nne6hzg.feishu.cn/sheets/shtcna98M2mX7TbivTj9Sb7WKBN?'
			
 
				+    # YouTube
			
 
				+    crawler_youtube = 'https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?'
			
 
				+    # 微信指数
			
 
				+    weixinzhishu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?'
			
 
				+    # 微信指数_搜索词
			
 
				+    weixinzhishu_search_word = 'https://w42nne6hzg.feishu.cn/sheets/shtcnHxCj6dZBYMuK1Q3tIJVlqg?'
			
 
				+    # 海豚祝福
			
 
				+    crawler_haitunzhufu = 'https://w42nne6hzg.feishu.cn/sheets/VbyAsUGq3h9TQ7tG3GpczGjhn1M?'
			
 
				+
			
 
				+    # 飞书路径token
			
 
				+    @classmethod
			
 
				+    def spreadsheettoken(cls, crawler):
			
 
				+        """
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        """
			
 
				+        if crawler == "kanyikan":
			
 
				+            return "shtcngRPoDYAi24x52j2nDuHMih"
			
 
				+        elif crawler == "kuaishou":
			
 
				+            return "shtcnICEfaw9llDNQkKgdymM1xf"
			
 
				+        elif crawler == "weishi":
			
 
				+            return "shtcn5YSWg91JfVGzj0SFZIRRPh"
			
 
				+        elif crawler == "xiaoniangao":
			
 
				+            return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
			
 
				+        elif crawler == "control":
			
 
				+            return "shtcnlZWYazInhf7Z60jkbLRJyd"
			
 
				+        elif crawler == "music_album":
			
 
				+            return "shtcnT6zvmfsYe1g0iv4pt7855g"
			
 
				+        elif crawler == "benshanzhufu":
			
 
				+            return "shtcnGh2rrsPYM4iVNEBO7OqWrb"
			
 
				+        elif crawler == "gzh":
			
 
				+            return "shtcnexNXnpDLHhARw0QdiwbYuA"
			
 
				+        elif crawler == "weiqun":
			
 
				+            return "shtcnoKThNquYRweaylMFVyo9Hc"
			
 
				+        elif crawler == 'shipinhao':
			
 
				+            return 'shtcn9rOdZRAGFbRkWpn7hqEHGc'
			
 
				+        elif crawler == 'xigua':
			
 
				+            return 'shtcnvOpx2P8vBXiV91Ot1MKIw8'
			
 
				+        elif crawler == 'zhihu':
			
 
				+            return 'shtcnkGPBmGsjaqapgzouuj8MXe'
			
 
				+        elif crawler == 'jixiangxingfu':
			
 
				+            return 'shtcnSx4nafMbLTq7xl7RHBwHBf'
			
 
				+        elif crawler == 'fuxiaoshun':
			
 
				+            return 'CoXEsl6MDhMaKKt6GUBcvLwsnWb'
			
 
				+        elif crawler == 'zhongmiaoyinxin':
			
 
				+            return 'shtcnbZIxstPeM0xshW07b26sve'
			
 
				+        elif crawler == 'suisuiniannianyingfuqi':
			
 
				+            return 'shtcnyJmJSJynHDLLbLTkySfvZe'
			
 
				+        elif crawler == 'zhufumao':
			
 
				+            return 'shtcnXfIJthvkjhI5zlEJq84i6g'
			
 
				+        elif crawler == 'zongjiao':
			
 
				+            return 'shtcn73NW0CyoOeF21HWO15KBsb'
			
 
				+        elif crawler == 'haokan':
			
 
				+            return 'shtcnaYz8Nhv8q6DbWtlL6rMEBd'
			
 
				+        elif crawler == 'kandaojiushifuqi':
			
 
				+            return 'shtcnEokBkIjOUPAk8vbbPKnXgb'
			
 
				+        elif crawler == 'shengshengyingyin':
			
 
				+            return 'shtcnz1ymxHL1u8WHblfqfys7qe'
			
 
				+        elif crawler == 'ganggangdouchuan':
			
 
				+            return 'shtcnTuJgeZU2bc7VaesAqk3QJx'
			
 
				+        elif crawler == 'youtube':
			
 
				+            return 'shtcnrLyr1zbYbhhZyqpN7Xrd5f'
			
 
				+        elif crawler == 'weixinzhishu':
			
 
				+            return 'shtcnqhMRUGunIfGnGXMOBYiy4K'
			
 
				+        elif crawler == 'weixinzhishu_search_word':
			
 
				+            return 'shtcnHxCj6dZBYMuK1Q3tIJVlqg'
			
 
				+        elif crawler == 'gongzhonghao':
			
 
				+            return 'shtcna98M2mX7TbivTj9Sb7WKBN'
			
 
				+        elif crawler == 'douyin':
			
 
				+            return 'shtcnhq63MoXOpqbkuLuoapYIAh'
			
 
				+        elif crawler == 'zhiqingtiantiankan':
			
 
				+            return 'shtcnjmhKdJOKdqnEzJcZb5xaHc'
			
 
				+        elif crawler == 'haitunzhufu':
			
 
				+            return 'VbyAsUGq3h9TQ7tG3GpczGjhn1M'
			
 
				+
			
 
				+    # 获取飞书api token
			
 
				+    @classmethod
			
 
				+    def get_token(cls, log_type, crawler):
			
 
				+        """
			
 
				+        获取飞书api token
			
 
				+        :return:
			
 
				+        """
			
 
				+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
			
 
				+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
			
 
				+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
			
 
				+            tenant_access_token = response.json()["tenant_access_token"]
			
 
				+            return tenant_access_token
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("获取飞书 api token 异常:{}", e)
			
 
				+
			
 
				+    # 获取表格元数据
			
 
				+    @classmethod
			
 
				+    def get_metainfo(cls, log_type, crawler):
			
 
				+        """
			
 
				+        获取表格元数据
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                               + cls.spreadsheettoken(crawler) + "/metainfo"
			
 
				+
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                "extFields": "protectedRange",  # 额外返回的字段，extFields=protectedRange时返回保护行列信息
			
 
				+                "user_id_type": "open_id"  # 返回的用户id类型，可选open_id,union_id
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            return response
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("获取表格元数据异常:{}", e)
			
 
				+
			
 
				+    # 读取工作表中所有数据
			
 
				+    @classmethod
			
 
				+    def get_values_batch(cls, log_type, crawler, sheetid):
			
 
				+        """
			
 
				+        读取工作表中所有数据
			
 
				+        :param log_type: 启用哪个 log
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张表
			
 
				+        :return: 所有数据
			
 
				+        """
			
 
				+        try:
			
 
				+            get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                   + cls.spreadsheettoken(crawler) + "/values_batch_get"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                # 多个查询范围 如 url?ranges=range1,range2 ，其中 range 包含 sheetId 与单元格范围两部分
			
 
				+                "ranges": sheetid,
			
 
				+
			
 
				+                # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+                # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+                # valueRenderOption=Formula单元格中含有公式时返回公式本身；
			
 
				+                # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
			
 
				+                "valueRenderOption": "ToString",
			
 
				+
			
 
				+                # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+                "dateTimeRenderOption": "",
			
 
				+
			
 
				+                # 返回的用户id类型，可选open_id,union_id
			
 
				+                "user_id_type": "open_id"
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            # print(r.text)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            values = response["data"]["valueRanges"][0]["values"]
			
 
				+            return values
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("读取工作表所有数据异常:{}", e)
			
 
				+
			
 
				+    # 工作表，插入行或列
			
 
				+    @classmethod
			
 
				+    def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
			
 
				+        """
			
 
				+        工作表插入行或列
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫的云文档
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param majordimension:行或者列, ROWS、COLUMNS
			
 
				+        :param startindex:开始位置
			
 
				+        :param endindex:结束位置
			
 
				+        """
			
 
				+        try:
			
 
				+            insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                 + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "dimension": {
			
 
				+                    "sheetId": sheetid,
			
 
				+                    "majorDimension": majordimension,  # 默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+                    "startIndex": startindex,  # 开始的位置
			
 
				+                    "endIndex": endindex  # 结束的位置
			
 
				+                },
			
 
				+                "inheritStyle": "AFTER"  # BEFORE 或 AFTER，不填为不继承 style
			
 
				+            }
			
 
				+
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Local.logger(log_type, crawler).info("插入行或列:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("插入行或列异常:{}", e)
			
 
				+
			
 
				+    # 写入数据
			
 
				+    @classmethod
			
 
				+    def update_values(cls, log_type, crawler, sheetid, ranges, values):
			
 
				+        """
			
 
				+        写入数据
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫的云文档
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:单元格范围
			
 
				+        :param values:写入的具体数据，list
			
 
				+        """
			
 
				+        try:
			
 
				+            update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                + cls.spreadsheettoken(crawler) + "/values_batch_update"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "valueRanges": [
			
 
				+                    {
			
 
				+                        "range": sheetid + "!" + ranges,
			
 
				+                        "values": values
			
 
				+                    },
			
 
				+                ],
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Local.logger(log_type, crawler).info("写入数据:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("写入数据异常:{}", e)
			
 
				+
			
 
				+    # 合并单元格
			
 
				+    @classmethod
			
 
				+    def merge_cells(cls, log_type, crawler, sheetid, ranges):
			
 
				+        """
			
 
				+        合并单元格
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:需要合并的单元格范围
			
 
				+        """
			
 
				+        try:
			
 
				+            merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                              + cls.spreadsheettoken(crawler) + "/merge_cells"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+
			
 
				+            body = {
			
 
				+                "range": sheetid + "!" + ranges,
			
 
				+                "mergeType": "MERGE_ROWS"
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Local.logger(log_type, crawler).info("合并单元格:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("合并单元格异常:{}", e)
			
 
				+
			
 
				+    # 读取单元格数据
			
 
				+    @classmethod
			
 
				+    def get_range_value(cls, log_type, crawler, sheetid, cell):
			
 
				+        """
			
 
				+        读取单元格内容
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张工作表
			
 
				+        :param cell: 哪个单元格
			
 
				+        :return: 单元格内容
			
 
				+        """
			
 
				+        try:
			
 
				+            get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                  + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+                # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+                # valueRenderOption=Formula 单元格中含有公式时返回公式本身；
			
 
				+                # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
			
 
				+                "valueRenderOption": "FormattedValue",
			
 
				+
			
 
				+                # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+                "dateTimeRenderOption": "",
			
 
				+
			
 
				+                # 返回的用户id类型，可选open_id,union_id
			
 
				+                "user_id_type": "open_id"
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            # print(r.text)
			
 
				+            return r.json()["data"]["valueRange"]["values"][0]
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("读取单元格数据异常:{}", e)
			
 
				+
			
 
				+    # 获取表内容
			
 
				+    @classmethod
			
 
				+    def get_sheet_content(cls, log_type, crawler, sheet_id):
			
 
				+        try:
			
 
				+            sheet = Feishu.get_values_batch(log_type, crawler, sheet_id)
			
 
				+            content_list = []
			
 
				+            for x in sheet:
			
 
				+                for y in x:
			
 
				+                    if y is None:
			
 
				+                        pass
			
 
				+                    else:
			
 
				+                        content_list.append(y)
			
 
				+            return content_list
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error(f'get_sheet_content:{e}\n')
			
 
				+
			
 
				+    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+    @classmethod
			
 
				+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
			
 
				+        """
			
 
				+        删除行或列
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:工作表
			
 
				+        :param major_dimension:默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+        :param startindex:开始的位置
			
 
				+        :param endindex:结束的位置
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                  + cls.spreadsheettoken(crawler) + "/dimension_range"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "dimension": {
			
 
				+                    "sheetId": sheetid,
			
 
				+                    "majorDimension": major_dimension,
			
 
				+                    "startIndex": startindex,
			
 
				+                    "endIndex": endindex
			
 
				+                }
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Local.logger(log_type, crawler).info("删除视频数据:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error("删除视频数据异常:{}", e)
			
 
				+
			
 
				+    # 获取用户 ID
			
 
				+    @classmethod
			
 
				+    def get_userid(cls, log_type, crawler, username):
			
 
				+        try:
			
 
				+            url = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            name_phone_dict = {
			
 
				+                "xinxin": "15546206651",
			
 
				+                "muxinyi": "13699208058",
			
 
				+                "wangxueke": "13513479926",
			
 
				+                "yuzhuoyi": "18624010360",
			
 
				+                "luojunhui": "18801281360",
			
 
				+                "fanjun": "15200827642",
			
 
				+                "zhangyong": "17600025055"
			
 
				+            }
			
 
				+
			
 
				+            # if username == "wangkun":
			
 
				+            #     username = "13426262515"
			
 
				+            # # elif username == "gaonannan":
			
 
				+            # #     username = "18501180073"
			
 
				+            # elif username == "xinxin":
			
 
				+            #     username = "15546206651"
			
 
				+            # # elif username == "huxinxue":
			
 
				+            # #     username = "18832292015"
			
 
				+            # # elif username == "wuchaoyue":
			
 
				+            # #     username = "15712941385"
			
 
				+            # elif username == "muxinyi":
			
 
				+            #     username = '13699208058'
			
 
				+            # elif username == "wangxueke":
			
 
				+            #     username = '13513479926'
			
 
				+            # elif username == "yuzhuoyi":
			
 
				+            #     username = '18624010360'
			
 
				+            # elif username == "luojunhui":
			
 
				+            #     username = '18801281360'
			
 
				+            username = name_phone_dict.get(username)
			
 
				+
			
 
				+            data = {"mobiles": [username]}
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
			
 
				+            open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
			
 
				+            # Common.logger(log_type, crawler).info(f"{username}:{open_id}")
			
 
				+            # print(f"{username}:{open_id}")
			
 
				+            return open_id
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error(f"get_userid异常:{e}\n")
			
 
				+
			
 
				+    # 飞书机器人
			
 
				+    @classmethod
			
 
				+    def bot(cls, log_type, crawler, text):
			
 
				+        try:
			
 
				+            url = "https://open.feishu.cn/open-apis/bot/v2/hook/96989577-50e7-4653-9ec2-308fe3f2c5fe"
			
 
				+            headers = {'Content-Type': 'application/json'}
			
 
				+            if crawler == "kanyikan":
			
 
				+                content = "看一看爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+            elif crawler == "jixiangxingfu":
			
 
				+                content = text
			
 
				+                sheet_url = ""
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangxueke")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+
			
 
				+            # elif crawler == "weixinzhishu_out":
			
 
				+            #     content = "微信指数_站外指数"
			
 
				+            #     sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=YVuVgQ"
			
 
				+            #     users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+            #         cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
			
 
				+            # elif crawler == "weixinzhishu_inner_sort":
			
 
				+            #     content = "微信指数_站内短期指数"
			
 
				+            #     sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=DrZHpa"
			
 
				+            #     users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+            #         cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
			
 
				+            # elif crawler == "weixinzhishu_inner_long":
			
 
				+            #     content = "微信指数_站内长期指数"
			
 
				+            #     sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=JpgyAv"
			
 
				+            #     users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+            #         cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
			
 
				+            #
			
 
				+            # elif crawler == "weixinzhishu" and text == "今日微信指数抓取完毕":
			
 
				+            #     content = "微信指数"
			
 
				+            #     sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k"
			
 
				+            #     users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "yuzhuoyi")) + "></at>\n"
			
 
				+            # elif crawler == "weixinzhishu":
			
 
				+            #     content = "微信指数"
			
 
				+            #     sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k"
			
 
				+            #     users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+            #         cls.get_userid(log_type, crawler, "muxinyi")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "xiaoniangao_hour":
			
 
				+                content = "小年糕_小时级_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+            elif crawler == "xiaoniangao_person":
			
 
				+                content = "小年糕_用户主页_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=Wu0CeL"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+            elif crawler == "xiaoniangao_play":
			
 
				+                content = "小年糕_播放量_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=c85k1C"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == 'xigua' and log_type == "recommend":
			
 
				+                content = '西瓜视频_推荐_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=ZzsClu'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wangxueke")) + "></at>\n"
			
 
				+            # elif crawler == 'xigua':
			
 
				+            #     content = '西瓜视频_用户主页_已下载表'
			
 
				+            #     sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=e075e9'
			
 
				+            #     users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+            #         cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            # elif crawler == 'xigua_little_video':
			
 
				+            #     content = '西瓜视频_小视频_已下载表'
			
 
				+            #     sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=hDSDnv'
			
 
				+            #     users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+            #         cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == 'zhihu_hot':
			
 
				+                content = '知乎_热门_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?sheet=8871e3'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+            elif crawler == 'zhihu_follow':
			
 
				+                content = '知乎_定向_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?sheet=4MGuux'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == 'haokan_hot':
			
 
				+                content = '好看_热榜_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd?sheet=5pWipX'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == 'haokan_channel':
			
 
				+                content = '好看_频道_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd?sheet=7f05d8'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == 'haokan_follow':
			
 
				+                content = '好看_定向_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd?sheet=kVaSjf'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "music_album":
			
 
				+                content = "音乐相册爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "ssyy":
			
 
				+                content = "胜胜影音爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnz1ymxHL1u8WHblfqfys7qe"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "ggdc":
			
 
				+                content = "刚刚都传爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnTuJgeZU2bc7VaesAqk3QJx"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "bszf":
			
 
				+                content = "本山祝福爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "jxxf":
			
 
				+                content = "吉祥幸福爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnSx4nafMbLTq7xl7RHBwHBf"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "zmyx":
			
 
				+                content = "众妙音信爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnbZIxstPeM0xshW07b26sve"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "zhangyong")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "zhufumao":
			
 
				+                content = "祝福猫视频爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnXfIJthvkjhI5zlEJq84i6g"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "kuaishou_follow":
			
 
				+                content = "快手_用户主页_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?sheet=fYdA8F"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+            elif crawler == "kuaishou_recommend":
			
 
				+                content = "快手_推荐榜_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?sheet=3cd128"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "ssnnyfq":
			
 
				+                content = "岁岁年年迎福气_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnyJmJSJynHDLLbLTkySfvZe?sheet=290bae"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "luojunhui")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "kdjsfq":
			
 
				+                content = "看到就是福气_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnEokBkIjOUPAk8vbbPKnXgb?sheet=ad3b6d"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "gzh":
			
 
				+                content = "公众号爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "huxinxue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "gongzhonghao":
			
 
				+                content = "公众号_信欣_爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcna98M2mX7TbivTj9Sb7WKBN?"
			
 
				+                users = f"\n<at id={str(cls.get_userid(log_type, crawler, 'fanjun'))}></at> <at id={str(cls.get_userid(log_type, crawler, 'wangxueke'))}></at> <at id={str(cls.get_userid(log_type, crawler, 'luojunhui'))}></at>\n"
			
 
				+
			
 
				+            elif crawler == "weiqun":
			
 
				+                content = "微群爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "weishi":
			
 
				+                content = "微视爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "shipinhao_recommend":
			
 
				+                content = "视频号_推荐_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?sheet=c77cf9"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == "shipinhao_follow":
			
 
				+                content = "视频号_定向_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?sheet=KsVtLe"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == "youtube":
			
 
				+                content = "youtube_定向_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?sheet=GVxlYk"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "zongjiao":
			
 
				+                content = "宗教公众号爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn73NW0CyoOeF21HWO15KBsb"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "huxinxue")) + "></at>\n"
			
 
				+
			
 
				+            else:
			
 
				+                content = "小年糕爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at>\n"
			
 
				+
			
 
				+            data = json.dumps({
			
 
				+                "msg_type": "interactive",
			
 
				+                "card": {
			
 
				+                    "configs": {
			
 
				+                        "wide_screen_mode": True,
			
 
				+                        "enable_forward": True
			
 
				+                    },
			
 
				+                    "elements": [{
			
 
				+                        "tag": "div",
			
 
				+                        "text": {
			
 
				+                            "content": users + text,
			
 
				+                            "tag": "lark_md"
			
 
				+                        }
			
 
				+                    }, {
			
 
				+                        "actions": [{
			
 
				+                            "tag": "button",
			
 
				+                            "text": {
			
 
				+                                "content": content,
			
 
				+                                "tag": "lark_md"
			
 
				+                            },
			
 
				+                            "url": sheet_url,
			
 
				+                            "type": "default",
			
 
				+                            "value": {}
			
 
				+                        }],
			
 
				+                        "tag": "action"
			
 
				+                    }],
			
 
				+                    "header": {
			
 
				+                        "title": {
			
 
				+                            "content": "📣您有新的信息，请注意查收",
			
 
				+                            "tag": "plain_text"
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            })
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
			
 
				+            Local.logger(log_type, crawler).info(f'触发机器人消息:{r.status_code}, {text}')
			
 
				+        except Exception as e:
			
 
				+            Local.logger(log_type, crawler).error(f"bot异常:{e}\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    Feishu.bot('recommend', 'xigua', '测试: 西瓜推荐，登录失效')
			
 
				+    # print(Feishu.get_userid('bot', 'weixinzhishu', 'wangkun'))
			
 
				+    # print(Feishu.get_userid('bot', 'weixinzhishu', 'yuzhuoyi'))
			
--- a/application/common/feishu/feishu_data.py
+++ b/application/common/feishu/feishu_data.py
@@ -0,0 +1,20 @@
 
				+from application.common.feishu.feishu_utils import FeishuUtils
			
 
				+
			
 
				+
			
 
				+class FsData:
			
 
				+
			
 
				+    def get_title_rule(self):
			
 
				+        summary = FeishuUtils.get_values_batch("KsoMsyP2ghleM9tzBfmcEEXBnXg", "BS9uyu")
			
 
				+        for row in summary[1:]:
			
 
				+            title_rule = row[0]
			
 
				+            if title_rule:
			
 
				+                return title_rule
			
 
				+            else:
			
 
				+                return None
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    data_rule = FsData()
			
 
				+    title_rule = data_rule.get_title_rule()
			
 
				+    print(title_rule)
			
--- a/application/common/feishu/feishu_insert.py
+++ b/application/common/feishu/feishu_insert.py
@@ -0,0 +1,56 @@
 
				+"""
			
 
				+feishu python方法
			
 
				+"""
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+
			
 
				+def get_app_token():
			
 
				+    """
			
 
				+    获取飞书api token
			
 
				+    :return:
			
 
				+    """
			
 
				+    url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
			
 
				+    post_data = {
			
 
				+        "app_id": "cli_a51114cf8bf8d00c",  # 这里账号密码是发布应用的后台账号及密码
			
 
				+        "app_secret": "cNoTAqMpsAm7mPBcpCAXFfvOzCNL27fe",
			
 
				+    }
			
 
				+    response = requests.request("POST", url=url, data=post_data)
			
 
				+    tenant_access_token = response.json()["tenant_access_token"]
			
 
				+    return tenant_access_token
			
 
				+
			
 
				+
			
 
				+class FeishuInsert(object):
			
 
				+    """
			
 
				+    feishu Python Object
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, document_token):
			
 
				+        self.headers = {"Content-Type": "application/json"}
			
 
				+        self.document_token = document_token
			
 
				+
			
 
				+    def insert_value(self, sheet_id, ranges, values):
			
 
				+        """
			
 
				+        在表的某一个sheet的ranges中插入数据，若该地方存在数据，会自动把已有的数据往下移动，再写如数据
			
 
				+        :param sheet_id: 飞书表的唯一ID
			
 
				+        :param ranges: 单元格位置的range, 从左上角到右下角， 两边都是闭区间
			
 
				+        :param values: 二维数组， 用于填充ranges的空格数组
			
 
				+        """
			
 
				+        insert_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{}/values_prepend".format(
			
 
				+            self.document_token)
			
 
				+        # print(get_app_token())
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + get_app_token(),
			
 
				+            'contentType': 'application/json; charset=utf-8'
			
 
				+        }
			
 
				+        body = {
			
 
				+            "valueRange": {
			
 
				+                "range": "{}!{}".format(sheet_id, ranges),
			
 
				+                "values": values
			
 
				+            }
			
 
				+        }
			
 
				+        response = requests.request("POST", url=insert_value_url, headers=headers, json=body)
			
 
				+        print(response.json())
			
 
				+
			
 
				+
			
 
				+
			
--- a/application/common/feishu/feishu_utils.py
+++ b/application/common/feishu/feishu_utils.py
@@ -0,0 +1,398 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Time: 2023/12/26
			
 
				+"""
			
 
				+飞书表配置: token 鉴权 / 增删改查 / 机器人报警
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+import requests
			
 
				+import urllib3
			
 
				+from loguru import logger
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class FeishuUtils:
			
 
				+    """
			
 
				+    编辑飞书云文档
			
 
				+    """
			
 
				+    succinct_url = "https://w42nne6hzg.feishu.cn/sheets/"
			
 
				+    # 飞书路径token
			
 
				+    @classmethod
			
 
				+    def spreadsheettoken(cls, crawler):
			
 
				+        if crawler == "summary":
			
 
				+            return "KsoMsyP2ghleM9tzBfmcEEXBnXg"
			
 
				+        else:
			
 
				+            return crawler
			
 
				+
			
 
				+
			
 
				+
			
 
				+    # 获取飞书api token
			
 
				+    @classmethod
			
 
				+    def get_token(cls):
			
 
				+        """
			
 
				+        获取飞书api token
			
 
				+        :return:
			
 
				+        """
			
 
				+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
			
 
				+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
			
 
				+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
			
 
				+        urllib3.disable_warnings()
			
 
				+        response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
			
 
				+        tenant_access_token = response.json()["tenant_access_token"]
			
 
				+        return tenant_access_token
			
 
				+
			
 
				+    # 获取表格元数据
			
 
				+    @classmethod
			
 
				+    def get_metainfo(cls, crawler):
			
 
				+        """
			
 
				+        获取表格元数据
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                               + cls.spreadsheettoken(crawler) + "/metainfo"
			
 
				+
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                "extFields": "protectedRange",  # 额外返回的字段，extFields=protectedRange时返回保护行列信息
			
 
				+                "user_id_type": "open_id"  # 返回的用户id类型，可选open_id,union_id
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            return response
			
 
				+        except Exception as e:
			
 
				+            logger.error("获取表格元数据异常:{}", e)
			
 
				+
			
 
				+    # 读取工作表中所有数据
			
 
				+    @classmethod
			
 
				+    def get_values_batch(cls, crawler, sheetid):
			
 
				+        """
			
 
				+        读取工作表中所有数据
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张表
			
 
				+        :return: 所有数据
			
 
				+        """
			
 
				+
			
 
				+        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                               + cls.spreadsheettoken(crawler) + "/values_batch_get"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        params = {
			
 
				+            "ranges": sheetid,
			
 
				+            "valueRenderOption": "ToString",
			
 
				+            "dateTimeRenderOption": "",
			
 
				+            "user_id_type": "open_id"
			
 
				+        }
			
 
				+        urllib3.disable_warnings()
			
 
				+        r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+        response = json.loads(r.content.decode("utf8"))
			
 
				+        values = response["data"]["valueRanges"][0]["values"]
			
 
				+        return values
			
 
				+
			
 
				+
			
 
				+    # 工作表，插入行或列
			
 
				+    @classmethod
			
 
				+    def insert_columns(cls, crawler, sheetid, majordimension, startindex, endindex):
			
 
				+        """
			
 
				+        工作表插入行或列
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫的云文档
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param majordimension:行或者列, ROWS、COLUMNS
			
 
				+        :param startindex:开始位置
			
 
				+        :param endindex:结束位置
			
 
				+        """
			
 
				+        try:
			
 
				+            insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                 + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "dimension": {
			
 
				+                    "sheetId": sheetid,
			
 
				+                    "majorDimension": majordimension,  # 默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+                    "startIndex": startindex,  # 开始的位置
			
 
				+                    "endIndex": endindex  # 结束的位置
			
 
				+                },
			
 
				+                "inheritStyle": "AFTER"  # BEFORE 或 AFTER，不填为不继承 style
			
 
				+            }
			
 
				+
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+        except Exception as e:
			
 
				+            logger.error("插入行或列异常:{}", e)
			
 
				+
			
 
				+    # 写入数据
			
 
				+    @classmethod
			
 
				+    def update_values(cls, crawler, sheetid, ranges, values):
			
 
				+        """
			
 
				+        写入数据
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫的云文档
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:单元格范围
			
 
				+        :param values:写入的具体数据，list
			
 
				+        """
			
 
				+        try:
			
 
				+            update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                + cls.spreadsheettoken(crawler) + "/values_batch_update"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "valueRanges": [
			
 
				+                    {
			
 
				+                        "range": sheetid + "!" + ranges,
			
 
				+                        "values": values
			
 
				+                    },
			
 
				+                ],
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+        except Exception as e:
			
 
				+            logger.error("写入数据异常:{}", e)
			
 
				+
			
 
				+    # 合并单元格
			
 
				+    @classmethod
			
 
				+    def merge_cells(cls, crawler, sheetid, ranges):
			
 
				+        """
			
 
				+        合并单元格
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:需要合并的单元格范围
			
 
				+        """
			
 
				+        try:
			
 
				+            merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                              + cls.spreadsheettoken(crawler) + "/merge_cells"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+
			
 
				+            body = {
			
 
				+                "range": sheetid + "!" + ranges,
			
 
				+                "mergeType": "MERGE_ROWS"
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+        except Exception as e:
			
 
				+            logger.error("合并单元格异常:{}", e)
			
 
				+
			
 
				+    # 读取单元格数据
			
 
				+    @classmethod
			
 
				+    def get_range_value(cls, crawler, sheetid, cell):
			
 
				+        """
			
 
				+        读取单元格内容
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张工作表
			
 
				+        :param cell: 哪个单元格
			
 
				+        :return: 单元格内容
			
 
				+        """
			
 
				+        try:
			
 
				+            get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                  + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                "valueRenderOption": "FormattedValue",
			
 
				+
			
 
				+                # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+                "dateTimeRenderOption": "",
			
 
				+
			
 
				+                # 返回的用户id类型，可选open_id,union_id
			
 
				+                "user_id_type": "open_id"
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            # logger.error(r.text)
			
 
				+            return r.json()["data"]["valueRange"]["values"][0]
			
 
				+        except Exception as e:
			
 
				+            logger.error("读取单元格数据异常:{}", e)
			
 
				+    # 获取表内容
			
 
				+    @classmethod
			
 
				+    def get_sheet_content(cls, crawler, sheet_id):
			
 
				+        try:
			
 
				+            sheet = Feishu.get_values_batch(crawler, sheet_id)
			
 
				+            content_list = []
			
 
				+            for x in sheet:
			
 
				+                for y in x:
			
 
				+                    if y is None:
			
 
				+                        pass
			
 
				+                    else:
			
 
				+                        content_list.append(y)
			
 
				+            return content_list
			
 
				+        except Exception as e:
			
 
				+            logger.error(f'get_sheet_content:{e}\n')
			
 
				+
			
 
				+    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+    @classmethod
			
 
				+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
			
 
				+        """
			
 
				+        删除行或列
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:工作表
			
 
				+        :param major_dimension:默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+        :param startindex:开始的位置
			
 
				+        :param endindex:结束的位置
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                  + cls.spreadsheettoken(crawler) + "/dimension_range"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "dimension": {
			
 
				+                    "sheetId": sheetid,
			
 
				+                    "majorDimension": major_dimension,
			
 
				+                    "startIndex": startindex,
			
 
				+                    "endIndex": endindex
			
 
				+                }
			
 
				+            }
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+        except Exception as e:
			
 
				+            logger.error("删除视频数据异常:{}", e)
			
 
				+
			
 
				+    # 获取用户 ID
			
 
				+    @classmethod
			
 
				+    def get_userid(cls, username):
			
 
				+        try:
			
 
				+            url = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            name_phone_dict = {
			
 
				+                "xinxin": "15546206651",
			
 
				+                "muxinyi": "13699208058",
			
 
				+                "wangxueke": "13513479926",
			
 
				+                "yuzhuoyi": "18624010360",
			
 
				+                "luojunhui": "18801281360",
			
 
				+                "fanjun": "15200827642",
			
 
				+                "zhangyong": "17600025055",
			
 
				+                'liukunyu': "18810931977"
			
 
				+            }
			
 
				+            username = name_phone_dict.get(username)
			
 
				+
			
 
				+            data = {"mobiles": [username]}
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
			
 
				+            open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
			
 
				+
			
 
				+            return open_id
			
 
				+        except Exception as e:
			
 
				+            pass
			
 
				+            # logger.error(f"get_userid异常:{e}\n")
			
 
				+
			
 
				+    # 飞书机器人
			
 
				+    @classmethod
			
 
				+    def bot(cls, log_type, crawler, text, mark_name):
			
 
				+        try:
			
 
				+
			
 
				+            headers = {'Content-Type': 'application/json'}
			
 
				+            if crawler == "机器自动改造消息通知":
			
 
				+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=bc154d"
			
 
				+                users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
			
 
				+            elif crawler == "快手关键词搜索":
			
 
				+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/e7697dc6-5254-4411-8b59-3cd0742bf703"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=U1gySe"
			
 
				+                users = "".join([f'<at id="{cls.get_userid(type)}">{name}</at>' for type, name in
			
 
				+                                 zip(log_type, mark_name)])
			
 
				+                # users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
			
 
				+            else:
			
 
				+                url = "https://open.feishu.cn/open-apis/bot/v2/hook/7928f182-08c1-4c4d-b2f7-82e10c93ca80"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/KsoMsyP2ghleM9tzBfmcEEXBnXg?sheet=bc154d"
			
 
				+                users = f"<at id=" + str(cls.get_userid(log_type)) + f">{mark_name}</at>"
			
 
				+            data = json.dumps({
			
 
				+                "msg_type": "interactive",
			
 
				+                "card": {
			
 
				+                    "configs": {
			
 
				+                        "wide_screen_mode": True,
			
 
				+                        "enable_forward": True
			
 
				+                    },
			
 
				+                    "elements": [{
			
 
				+                        "tag": "div",
			
 
				+                        "text": {
			
 
				+                            "content": users + text,
			
 
				+                            "tag": "lark_md"
			
 
				+                        }
			
 
				+                    }, {
			
 
				+                        "actions": [{
			
 
				+                            "tag": "button",
			
 
				+                            "text": {
			
 
				+                                "content": "详情,点击～～～～～",
			
 
				+                                "tag": "lark_md"
			
 
				+                            },
			
 
				+                            "url": sheet_url,
			
 
				+                            "type": "default",
			
 
				+                            "value": {}
			
 
				+                        }],
			
 
				+                        "tag": "action"
			
 
				+                    }],
			
 
				+                    "header": {
			
 
				+                        "title": {
			
 
				+                            "content": "📣消息提醒",
			
 
				+                            "tag": "plain_text"
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            })
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"bot异常:{e}\n")
			
 
				+
			
 
				+    # 飞书机器人-改造计划完成通知
			
 
				+    @classmethod
			
 
				+    def finish_bot(cls, text, url, content):
			
 
				+        try:
			
 
				+            headers = {'Content-Type': 'application/json'}
			
 
				+            data = json.dumps({
			
 
				+                "msg_type": "interactive",
			
 
				+                "card": {
			
 
				+                    "configs": {
			
 
				+                        "wide_screen_mode": True,
			
 
				+                        "enable_forward": True
			
 
				+                    },
			
 
				+                    "elements": [{
			
 
				+                        "tag": "div",
			
 
				+                        "text": {
			
 
				+                            "content": text,
			
 
				+                            "tag": "lark_md"
			
 
				+                        }
			
 
				+                    }],
			
 
				+                    "header": {
			
 
				+                        "title": {
			
 
				+                            "content": content,
			
 
				+                            "tag": "plain_text"
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            })
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"bot异常:{e}\n")
			
 
				+
			
--- a/application/common/ffmpeg/__init__.py
+++ b/application/common/ffmpeg/__init__.py
--- a/application/common/ffmpeg/ffmpeg_utils.py
+++ b/application/common/ffmpeg/ffmpeg_utils.py
@@ -0,0 +1,76 @@
 
				+import requests
			
 
				+import json
			
 
				+
			
 
				+class Ffmpeg:
			
 
				+
			
 
				+    def get_oss_link(self, oss_key):
			
 
				+        url = "http://61.48.133.26:5555/api/v1/oss/get_object_link"
			
 
				+
			
 
				+        payload = json.dumps({
			
 
				+            "oss_object_key": oss_key
			
 
				+        })
			
 
				+        headers = {
			
 
				+            'Authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNGNhMTI4ZGYtYWMzMy00NWQ2LTg3MmEtMDAzOTk4MGVhM2ViIiwibmFtZSI6Inp5IiwiZXhwIjoyMDUwOTI3MjExfQ.k_rvuESjA62RgPDiLniVgJyLJn3Q8C1Y_AGq3CPRuKI',
			
 
				+            'Content-Type': 'application/json'
			
 
				+        }
			
 
				+
			
 
				+        response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+        response = response.json()
			
 
				+        data = response['data']
			
 
				+        return data
			
 
				+
			
 
				+    def merge_m3u8(self,url_link):
			
 
				+        url = "http://101.37.24.17:5555/api/v1/ffmpeg/merge_m3u8"
			
 
				+
			
 
				+        data = {
			
 
				+            "url": url_link,
			
 
				+            "referer": ""
			
 
				+        }
			
 
				+        headers = {
			
 
				+            'Authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNGNhMTI4ZGYtYWMzMy00NWQ2LTg3MmEtMDAzOTk4MGVhM2ViIiwibmFtZSI6Inp5IiwiZXhwIjoyMDUwOTI3MjExfQ.k_rvuESjA62RgPDiLniVgJyLJn3Q8C1Y_AGq3CPRuKI',
			
 
				+            'Content-Type': 'application/json'
			
 
				+        }
			
 
				+
			
 
				+        response = requests.request("POST", url, headers=headers, json=data, stream=True)
			
 
				+        for item in response.content.split(b'\r\n\r\n'):
			
 
				+            try:
			
 
				+                item = json.loads(item[6:].decode())
			
 
				+                if item['event'] == 'message':
			
 
				+                    continue
			
 
				+                elif item['event'] == 'ffmpeg code':
			
 
				+                    code = int(item['data'])
			
 
				+                    if code != 0:  # ffmpeg处理异常
			
 
				+                        return
			
 
				+                elif item['event'] == 'result':
			
 
				+                    oss_object_key = item['data']['oss_object_key']
			
 
				+                    if oss_object_key:
			
 
				+                        oss_url = self.get_oss_link(oss_object_key)
			
 
				+                        return oss_url
			
 
				+            except json.decoder.JSONDecodeError:
			
 
				+                continue
			
 
				+
			
 
				+    def webp2_jpg(self,webp2_url):
			
 
				+        url = "http://101.37.24.17:5555/api/v1/ffmpeg/webp2jpg"
			
 
				+
			
 
				+        payload = json.dumps({
			
 
				+            "url": webp2_url,
			
 
				+            "referer": ""
			
 
				+        })
			
 
				+        headers = {
			
 
				+            'Authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNGNhMTI4ZGYtYWMzMy00NWQ2LTg3MmEtMDAzOTk4MGVhM2ViIiwibmFtZSI6Inp5IiwiZXhwIjoyMDUwOTI3MjExfQ.k_rvuESjA62RgPDiLniVgJyLJn3Q8C1Y_AGq3CPRuKI',
			
 
				+            'Content-Type': 'application/json'
			
 
				+        }
			
 
				+
			
 
				+        response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+        response = response.json()
			
 
				+        oss_object_key = response['data']['oss_object_key']
			
 
				+        if oss_object_key:
			
 
				+            oss_url = self.get_oss_link(oss_object_key)
			
 
				+            return oss_url
			
 
				+        else:
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    ffmpeg = Ffmpeg()
			
 
				+    print(ffmpeg.get_oss_link("jq_oss/video/20250103135417425230.mp4"))
			
--- a/application/common/gpt/__init__.py
+++ b/application/common/gpt/__init__.py
@@ -0,0 +1 @@
 
				+from .gpt4o_mini_help import GPT4oMini
			
--- a/application/common/gpt/gpt4o_mini_help.py
+++ b/application/common/gpt/gpt4o_mini_help.py
@@ -0,0 +1,61 @@
 
				+import json
			
 
				+
			
 
				+import requests
			
 
				+class GPT4oMini:
			
 
				+
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_ai_mini_title(cls, title):
			
 
				+        url = "http://aigc-api.cybertogether.net//aigc/dev/test/gpt"
			
 
				+        payload = json.dumps({
			
 
				+            "imageList": [],
			
 
				+            "model": "gpt-4o-mini-2024-07-18",
			
 
				+            "prompt": (
			
 
				+            "针对微信平台视频类小程序场景"
			
 
				+            "面向人群是中国中老年人，在单聊、群聊场景。为视频生成一个吸引人的标题。每次生成我会提供一个原标题，你通过以下规则生成一个新的标题。"
			
 
				+            "生成规则："
			
 
				+            "a.生成的新标题一定一定不能包含以下任何一个或多个风险词。"
			
 
				+            "风险词：请注意, 分享, 听听, 看看, 全体, 一定, 所以人, 无数人, 值得一看, 值得一听, 99 %, 震撼, 必, 必看, 必听, 必读, 全场, 听听, 一起听听, 一起, 快看, 看看, 快来, 分享, 转发, 都看看吧, 都来, 注意, 最新, 紧急, 速看, 速转, 刚刚, 事关, 赶紧, 一定要, 千万不要, 震惊, 惊人, 亿万, 无数, 百分之, 自杀, 致死, 全体国民, 全体国人, 央视, 中央, 国务院, 人民日报, 卫生部, 官方, 气象局, 世卫, 联合国, 新闻, 内部, 内幕, 最新, 医生提醒, 爆炸性消息, 九胞胎, 天大的, 连看三遍, 务必看, 终于曝光, 神药, 危害太大, 不要吃了, 大事发生, 无数国人, 再忙也要, 出大事, 关系你我, 正式确认, 好消息, 突然传出, 新规出台, 重要的消息, 重要消息, 即将失传, 打死都, 惊天, 不要再吃, 格外留心, 太危险, 可怕一幕, 身亡, 后果很严重, 寿命长短, 错过别后悔, 必看, 早点知道就好了, 不得不信, 看一次少一次, 无数人, 老美, 新华社, 新规, 最新骗局, 新型骗局, 吃的是这些, 大老虎, 官员财产, 老中医, 预言, 致命, 救命, 保命, 非常难得, 太震撼了, 快来看, 一定要看, 来看看, 所有人都, 头一次见, 新型"
			
 
				+            "b.新标题字符不小于15个字，同时不超过30个字。"
			
 
				+            "c.新标题最前面或最后面必须加上emoij符号。如“🔴”、“⭕️”、“🚩”、“🔥”、“💖”"
			
 
				+            "d.新标题只去掉原标题里的低质词，但语句、语意都和原标题保持不变。"
			
 
				+            "e.去掉低质词后，根据语意适当加字句，使新标题整句读起来简洁、通顺、有吸引力、并准确反映视频核心内容。但一定不能包含任何一个或多个风险词。"
			
 
				+
			
 
				+            "视频的原标题：“哇！好美的一个视频，发给您也看看！”、“晚上好，这也太美啦，发给大家一起欣赏欣赏。”、“____这段话说得真好，一起听听！每句话都很有道快分享给群友看看吧！”、“👈这段话说的真好，值得一听”、“🔴世界顶尖雪雕❗ 太真实了，太美了！忍不住发给你看看！”、“💖《等》说得真好，看看吧...”、“🔴这样的萌娃你们喜欢吗，都看看吧！”、“🔴2025金蛇纳福，这首歌送给全体群友，祝大家财运亨通永不断！”、“🔴元旦青蛇遇双春，这三件事千万别做，都看看吧！”、“💕呵呵太搞笑了！老师和家长的对话！值得一看！绝了！”、“❤️《中国知识大全》太珍贵了！值得我们每个中国人都看看！”、“六岁小女孩一首《爸》全场泪奔”、“🔴酒店招牌菜，菠菜炒鸡蛋的家常做法，快来学学！”、“这个视频，分享给我的老友，祝愿您能幸福安康”"
			
 
				+
			
 
				+            "请务必严格遵守上述生成规则，为原标题生成对应的新标题。"
			
 
				+            f"请分析该标题，标题为：{title}，返回新的标题。"
			
 
				+            ),
			
 
				+            "responseFormat": {
			
 
				+                "type": "json_schema",
			
 
				+                "json_schema": {
			
 
				+                    "strict": True,
			
 
				+                    "name": "share_script_result",
			
 
				+                    "schema": {
			
 
				+                        "type": "object",
			
 
				+                        "properties": {
			
 
				+                            "新标题": {
			
 
				+                                "type": "string",
			
 
				+                                "description": "生成新的标题"
			
 
				+                            }
			
 
				+                        },
			
 
				+                        "required": ["新标题"],
			
 
				+                        "additionalProperties": False
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        })
			
 
				+        headers = {'Content-Type': 'application/json'}
			
 
				+        try:
			
 
				+            response = requests.post(url, headers=headers, data=payload)
			
 
				+            response_data = response.json()
			
 
				+
			
 
				+            data = json.loads(response_data.get('data', '{}'))
			
 
				+            new_title = data["新标题"]
			
 
				+            return new_title
			
 
				+        except Exception as e:
			
 
				+            return None
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    title = GPT4oMini.get_ai_mini_title("🔴这位美女说的太好了！这就是我们的大中国")
			
 
				+    print(title)
			
--- a/application/common/log/__init__.py
+++ b/application/common/log/__init__.py
@@ -0,0 +1,2 @@
 
				+from .local_log import Local
			
 
				+from .aliyun_log import AliyunLogger
			
--- a/application/common/log/aliyun_log.py
+++ b/application/common/log/aliyun_log.py
@@ -0,0 +1,81 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: 罗俊辉
			
 
				+# @Time: 2023/12/18
			
 
				+"""
			
 
				+公共方法，包含：生成log
			
 
				+"""
			
 
				+import json
			
 
				+from aliyun.log import LogClient, PutLogsRequest, LogItem
			
 
				+import time
			
 
				+
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class AliyunLogger(object):
			
 
				+    """
			
 
				+    阿里云日志方法
			
 
				+    """
			
 
				+    def __init__(self, platform, mode, env="prod"):
			
 
				+        self.platform = platform
			
 
				+        self.mode = mode
			
 
				+        self.env = env
			
 
				+
			
 
				+    # 写入阿里云日志
			
 
				+    def logging(
			
 
				+            self, code, message, data=None, trace_id=None, account=None
			
 
				+    ):
			
 
				+        """
			
 
				+        写入阿里云日志
			
 
				+        测试库: https://sls.console.aliyun.com/lognext/project/crawler-log-dev/logsearch/crawler-log-dev
			
 
				+        正式库: https://sls.console.aliyun.com/lognext/project/crawler-log-prod/logsearch/crawler-log-prod
			
 
				+        """
			
 
				+        # 设置阿里云日志服务的访问信息
			
 
				+        if data is None:
			
 
				+            data = {}
			
 
				+        accessKeyId = "LTAIWYUujJAm7CbH"
			
 
				+        accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P"
			
 
				+        if self.env == "dev":
			
 
				+            project = "crawler-log-dev"
			
 
				+            logstore = "crawler-log-dev"
			
 
				+            endpoint = "cn-hangzhou.log.aliyuncs.com"
			
 
				+        else:
			
 
				+            project = "crawler-log-prod"
			
 
				+            logstore = "crawler-fetch"
			
 
				+            endpoint = "cn-hangzhou.log.aliyuncs.com"
			
 
				+
			
 
				+        # 创建 LogClient 实例
			
 
				+        client = LogClient(endpoint, accessKeyId, accessKey)
			
 
				+        log_group = []
			
 
				+        log_item = LogItem()
			
 
				+
			
 
				+        """
			
 
				+        生成日志消息体格式，例如
			
 
				+        crawler:xigua
			
 
				+        message:不满足抓取规则 
			
 
				+        mode:search
			
 
				+        timestamp:1686656143
			
 
				+        """
			
 
				+        message = message.replace("\r", " ").replace("\n", " ")
			
 
				+        contents = [
			
 
				+            (f"TraceId", str(trace_id)),
			
 
				+            (f"code", str(code)),
			
 
				+            (f"platform", str(self.platform)),
			
 
				+            (f"mode", str(self.mode)),
			
 
				+            (f"message", str(message)),
			
 
				+            (f"data", json.dumps(data, ensure_ascii=False) if data else ""),
			
 
				+            (f"account", str(account)),
			
 
				+            ("timestamp", str(int(time.time()))),
			
 
				+        ]
			
 
				+
			
 
				+        log_item.set_contents(contents)
			
 
				+        log_group.append(log_item)
			
 
				+        # 写入日志
			
 
				+        request = PutLogsRequest(
			
 
				+            project=project,
			
 
				+            logstore=logstore,
			
 
				+            topic="",
			
 
				+            source="",
			
 
				+            logitems=log_group,
			
 
				+            compress=False,
			
 
				+        )
			
 
				+        client.put_logs(request)
			
--- a/application/common/log/local_log.py
+++ b/application/common/log/local_log.py
@@ -0,0 +1,54 @@
 
				+import sys
			
 
				+from datetime import date, timedelta, datetime
			
 
				+from loguru import logger
			
 
				+from pathlib import Path
			
 
				+
			
 
				+class Local:
			
 
				+    # 日期常量
			
 
				+    now = datetime.now()
			
 
				+    today = date.today()
			
 
				+    yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
			
 
				+    tomorrow = (today + timedelta(days=1)).strftime("%Y-%m-%d")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def init_logger(platform: str, mode: str = "prod", log_level: str = "INFO", log_to_console: bool = False,
			
 
				+                    rotation: str = "00:00", retention: str = "10 days"):
			
 
				+        """
			
 
				+        初始化日志记录器
			
 
				+        :param platform: 平台名称，用于区分日志目录
			
 
				+        :param mode: 运行环境（如 prod/test/dev）
			
 
				+        :param log_level: 日志级别（如 INFO、DEBUG）
			
 
				+        :param log_to_console: 是否同时输出到控制台
			
 
				+        :param rotation: 日志文件切分策略（默认每天 00:00）
			
 
				+        :param retention: 日志保留时间（默认10天）
			
 
				+        """
			
 
				+        # 创建日志目录
			
 
				+        log_dir = Path(f"./log_store/{platform}")
			
 
				+        log_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        # 设置日志文件名
			
 
				+        log_filename = f"{platform}-{mode}-{Local.today.strftime('%Y-%m-%d')}.log"
			
 
				+        log_file_path = log_dir / log_filename
			
 
				+
			
 
				+        # 清除默认 handler
			
 
				+        logger.remove()
			
 
				+
			
 
				+        # 添加文件日志 handler
			
 
				+        logger.add(
			
 
				+            str(log_file_path),
			
 
				+            level=log_level.upper(),
			
 
				+            rotation=rotation,
			
 
				+            retention=retention,
			
 
				+            encoding="utf-8",
			
 
				+            enqueue=True
			
 
				+        )
			
 
				+
			
 
				+        # 可选：输出到控制台
			
 
				+        if log_to_console:
			
 
				+            logger.add(
			
 
				+                sink=sys.stdout,
			
 
				+                level=log_level.upper(),
			
 
				+                format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level}</level> | {message}"
			
 
				+            )
			
 
				+
			
 
				+        return logger
			
--- a/application/common/messageQueue/__init__.py
+++ b/application/common/messageQueue/__init__.py
@@ -0,0 +1,3 @@
 
				+from .mq import MQ
			
 
				+from .ack_message import ack_message
			
 
				+from .consumer import get_consumer
			
--- a/application/common/messageQueue/ack_message.py
+++ b/application/common/messageQueue/ack_message.py
@@ -0,0 +1,15 @@
 
				+def ack_message(mode, platform, recv_msgs, consumer, trace_id=None):
			
 
				+    """
			
 
				+    消费成功后确认消息
			
 
				+    """
			
 
				+    try:
			
 
				+        receipt_handle_list = [recv_msgs.receipt_handle]
			
 
				+        consumer.ack_message(receipt_handle_list)
			
 
				+        Local.logger(platform, mode).info(
			
 
				+            f"[trace_id={trace_id}] Ack {len(receipt_handle_list)} Message Succeed."
			
 
				+        )
			
 
				+
			
 
				+    except MQExceptionBase as err:
			
 
				+        Local.logger(platform, mode).error(
			
 
				+            f"[trace_id={trace_id}] Ack Message Fail! Exception:{err}"
			
 
				+        )
			
--- a/application/common/messageQueue/consumer.py
+++ b/application/common/messageQueue/consumer.py
@@ -0,0 +1,25 @@
 
				+from mq_http_sdk.mq_client import *
			
 
				+
			
 
				+
			
 
				+def get_consumer(topic_name, group_id):
			
 
				+    # 初始化client。
			
 
				+    mq_client = MQClient(
			
 
				+        # 设置HTTP协议客户端接入点，进入云消息队列 RocketMQ 版控制台实例详情页面的接入点区域查看。
			
 
				+        "http://1894469520484605.mqrest.cn-qingdao-public.aliyuncs.com",
			
 
				+        # AccessKey ID，阿里云身份验证标识。获取方式，请参见创建AccessKey。
			
 
				+        "LTAI4G7puhXtLyHzHQpD6H7A",
			
 
				+        # AccessKey Secret，阿里云身份验证密钥。获取方式，请参见创建AccessKey。
			
 
				+        "nEbq3xWNQd1qLpdy2u71qFweHkZjSG",
			
 
				+    )
			
 
				+    # 消息所属的Topic，在云消息队列 RocketMQ 版控制台创建。
			
 
				+    # topic_name = "${TOPIC}"
			
 
				+    topic_name = str(topic_name)
			
 
				+    # 您在云消息队列 RocketMQ 版控制台创建的Group ID。
			
 
				+    # group_id = "${GROUP_ID}"
			
 
				+    group_id = str(group_id)
			
 
				+    # Topic所属的实例ID，在云消息队列 RocketMQ 版控制台创建。
			
 
				+    # 若实例有命名空间，则实例ID必须传入；若实例无命名空间，则实例ID传入空字符串。实例的命名空间可以在云消息队列 RocketMQ 版控制台的实例详情页面查看。
			
 
				+    instance_id = "MQ_INST_1894469520484605_BXhXuzkZ"
			
 
				+
			
 
				+    consumer = mq_client.get_consumer(instance_id, topic_name, group_id)
			
 
				+    return consumer
			
--- a/application/common/messageQueue/mq.py
+++ b/application/common/messageQueue/mq.py
@@ -0,0 +1,51 @@
 
				+import json
			
 
				+from mq_http_sdk.mq_exception import MQExceptionBase
			
 
				+from mq_http_sdk.mq_producer import TopicMessage
			
 
				+from mq_http_sdk.mq_client import MQClient
			
 
				+import traceback
			
 
				+from application.common.log import Local
			
 
				+from application.common.log import AliyunLogger
			
 
				+
			
 
				+
			
 
				+class MQ(object):
			
 
				+    """
			
 
				+    MQ Class
			
 
				+    """
			
 
				+    instance_id = "MQ_INST_1894469520484605_BXhXuzkZ"
			
 
				+
			
 
				+    def __init__(self, topic_name) -> None:
			
 
				+        self.mq_client = MQClient("http://1894469520484605.mqrest.cn-qingdao-public.aliyuncs.com",
			
 
				+                                  "LTAI4G7puhXtLyHzHQpD6H7A",
			
 
				+                                  "nEbq3xWNQd1qLpdy2u71qFweHkZjSG")
			
 
				+        topic_name = topic_name+"_v2"
			
 
				+        self.producer = self.mq_client.get_producer(self.instance_id, topic_name)
			
 
				+
			
 
				+    def send_msg(self, video_dict, max_retries = 3):
			
 
				+        """
			
 
				+        发送 mq，并且记录 redis
			
 
				+        :param video_dict:
			
 
				+        """
			
 
				+        strategy = video_dict["strategy"]
			
 
				+        platform = video_dict["platform"]
			
 
				+        self.aliyun_log = AliyunLogger(mode=strategy, platform=platform)
			
 
				+        for retry in range(max_retries):
			
 
				+            try:
			
 
				+                msg = TopicMessage(json.dumps(video_dict))
			
 
				+                message_key = "{}-{}-{}".format(platform, strategy, video_dict['out_video_id'])
			
 
				+                msg.set_message_key(message_key)
			
 
				+                re_msg = self.producer.publish_message(msg)
			
 
				+                Local.init_logger(platform,strategy).info("Publish Message Succeed. MessageID:%s, BodyMD5:%s\n" %
			
 
				+                                                      (re_msg.message_id, re_msg.message_body_md5))
			
 
				+                return
			
 
				+            except MQExceptionBase as e:
			
 
				+                tb = traceback.format_exc()
			
 
				+                # 如果是最后一次重试失败，记录日志
			
 
				+                if retry == max_retries - 1:
			
 
				+                    Local.init_logger(platform, strategy).error(
			
 
				+                        f"Publish Message Fail after {max_retries} attempts. Exception: {e}\n{tb}"
			
 
				+                    )
			
 
				+                    self.aliyun_log.logging(
			
 
				+                        code="5005",
			
 
				+                        message=f"Publish Message Fail after {max_retries} attempts. Exception: {e}",
			
 
				+                        data= tb
			
 
				+                    )
			
--- a/application/common/mysql/__init__.py
+++ b/application/common/mysql/__init__.py
@@ -0,0 +1 @@
 
				+from .mysql_helper import MysqlHelper
			
--- a/application/common/mysql/mysql_helper.py
+++ b/application/common/mysql/mysql_helper.py
@@ -0,0 +1,122 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: luojunhui
			
 
				+# @Time: 2023/12/19
			
 
				+"""
			
 
				+数据库连接及操作
			
 
				+"""
			
 
				+import redis
			
 
				+import pymysql
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+from application.common.log import Local
			
 
				+from application.config.mysql_config import env_dict
			
 
				+
			
 
				+
			
 
				+class MysqlHelper(object):
			
 
				+    """
			
 
				+    MySQL工具, env默认prod版本
			
 
				+    """
			
 
				+    def __init__(self, env="prod", mode='', platform='', action=''):
			
 
				+        mysql_config = env_dict[env]
			
 
				+        self.connection = pymysql.connect(
			
 
				+            host=mysql_config['host'],  # 数据库IP地址，内网地址
			
 
				+            port=mysql_config['port'],  # 端口号
			
 
				+            user=mysql_config['user'],  # mysql用户名
			
 
				+            passwd=mysql_config['passwd'],  # mysql用户登录密码
			
 
				+            db=mysql_config['db'],  # 数据库名
			
 
				+            charset=mysql_config['charset']  # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+        )
			
 
				+        self.mode = mode
			
 
				+        self.platform = platform
			
 
				+        self.action = action
			
 
				+
			
 
				+    def select(self, sql):
			
 
				+        """
			
 
				+        查询
			
 
				+        :param sql:
			
 
				+        :return:
			
 
				+        """
			
 
				+        cursor = self.connection.cursor()
			
 
				+        cursor.execute(sql)
			
 
				+        data = cursor.fetchall()
			
 
				+        return data
			
 
				+
			
 
				+    def select_params(self, sql, params=None):
			
 
				+        cursor = self.connection.cursor()
			
 
				+        cursor.execute(sql, params or ())  # 支持参数化查询
			
 
				+        data = cursor.fetchall()
			
 
				+        return data
			
 
				+
			
 
				+    def update(self, sql):
			
 
				+        """
			
 
				+        插入
			
 
				+        :param sql:
			
 
				+        :return:
			
 
				+        """
			
 
				+        cursor = self.connection.cursor()
			
 
				+        try:
			
 
				+            res = cursor.execute(sql)
			
 
				+            self.connection.commit()
			
 
				+            return res
			
 
				+        except Exception as e:
			
 
				+            Local.logger(self.mode, self.platform).error(f"update_values异常，进行回滚操作:{e}\n")
			
 
				+            self.connection.rollback()
			
 
				+
			
 
				+    def close(self):
			
 
				+        """
			
 
				+        关闭连接
			
 
				+        """
			
 
				+        self.connection.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+class RedisHelper:
			
 
				+    @classmethod
			
 
				+    def connect_redis(cls, env):
			
 
				+        if env == 'hk':
			
 
				+            redis_pool = redis.ConnectionPool(
			
 
				+                # host='r-bp154bpw97gptefiqk.redis.rds.aliyuncs.com',  # 内网地址
			
 
				+                # host='r-bp154bpw97gptefiqkpd.redis.rds.aliyuncs.com',  # 测试地址
			
 
				+                host='r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com',  # 外网地址
			
 
				+                port=6379,
			
 
				+                db=2,
			
 
				+                password='Wqsd@2019'
			
 
				+            )
			
 
				+            redis_conn = redis.Redis(connection_pool=redis_pool)
			
 
				+        elif env == 'prod':
			
 
				+            redis_pool = redis.ConnectionPool(
			
 
				+                host='r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com',  # 内网地址
			
 
				+                # host='r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com',  # 外网地址
			
 
				+                port=6379,
			
 
				+                db=2,
			
 
				+                password='Wqsd@2019'
			
 
				+            )
			
 
				+            redis_conn = redis.Redis(connection_pool=redis_pool)
			
 
				+        else:
			
 
				+            redis_pool = redis.ConnectionPool(
			
 
				+                # host='r-bp154bpw97gptefiqk.redis.rds.aliyuncs.com',  # 内网地址
			
 
				+                host='r-bp154bpw97gptefiqkpd.redis.rds.aliyuncs.com',  # 外网地址
			
 
				+                port=6379,
			
 
				+                db=2,
			
 
				+                password='Qingqu2019'
			
 
				+            )
			
 
				+            redis_conn = redis.Redis(connection_pool=redis_pool)
			
 
				+        return redis_conn
			
 
				+
			
 
				+    @classmethod
			
 
				+    def redis_push(cls, env, task_key, data):
			
 
				+        redis_conn = cls.connect_redis(env)
			
 
				+        # print("开始写入数据")
			
 
				+        redis_conn.lpush(task_key, data)
			
 
				+        # print("数据写入完成")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def redis_pop(cls, env, task_key):
			
 
				+        redis_conn = cls.connect_redis(env)
			
 
				+        if redis_conn.llen(task_key) == 0:
			
 
				+            return None
			
 
				+        else:
			
 
				+            return redis_conn.rpop(task_key)
			
--- a/application/common/mysql/sql.py
+++ b/application/common/mysql/sql.py
@@ -0,0 +1,57 @@
 
				+
			
 
				+
			
 
				+from datetime import datetime
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+from application.common.mysql import MysqlHelper
			
 
				+
			
 
				+class Sql:
			
 
				+    """
			
 
				+    修改用户名+头像
			
 
				+    """
			
 
				+    def update_name_url(self, mid, avatar_url, user_name):
			
 
				+        sql = f""" update xng_uid set avatar_url = "{avatar_url}", user_name="{user_name}" where uid = "{mid}"; """
			
 
				+        db = MysqlHelper()
			
 
				+        repeat_video = db.update(sql=sql)
			
 
				+        if repeat_video:
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    """
			
 
				+    插入 用户名 头像 用户id
			
 
				+    """
			
 
				+
			
 
				+    def insert_name_url(self, uid, avatar_url, user_name):
			
 
				+        current_time = datetime.now()
			
 
				+        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        insert_sql = f"""INSERT INTO xng_uid (uid, avatar_url, user_name, data_time) values ('{uid}' ,'{avatar_url}','{user_name}', '{formatted_time}')"""
			
 
				+        db = MysqlHelper()
			
 
				+        repeat_video = db.update(sql=insert_sql)
			
 
				+        if repeat_video:
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    """
			
 
				+    查询用户id是否存在
			
 
				+    """
			
 
				+
			
 
				+    def select_id(self, uid):
			
 
				+        sql = f""" select uid from xng_uid where uid = "{uid}"; """
			
 
				+        db = MysqlHelper()
			
 
				+        repeat_video = db.select(sql=sql)
			
 
				+        if repeat_video:
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    """
			
 
				+    查询用户id是否之前已添加过
			
 
				+    """
			
 
				+
			
 
				+    def select_id_status(self, uid):
			
 
				+        sql = f""" select uid from crawler_user_v3 where link = "{uid}"; """
			
 
				+        db = MysqlHelper()
			
 
				+        repeat_video = db.select(sql=sql)
			
 
				+        if repeat_video:
			
 
				+            return False
			
 
				+        return True
			
--- a/application/common/proxies/__init__.py
+++ b/application/common/proxies/__init__.py
@@ -0,0 +1,2 @@
 
				+from .fast_proxy import tunnel_proxies
			
 
				+from .fast_proxy import haiwai_tunnel_proxies
			
--- a/application/common/proxies/fast_proxy.py
+++ b/application/common/proxies/fast_proxy.py
@@ -0,0 +1,23 @@
 
				+def tunnel_proxies():
			
 
				+    # 隧道域名:端口号
			
 
				+    tunnel = "q796.kdltps.com:15818"
			
 
				+    # 用户名密码方式
			
 
				+    username = "t17772369458618"
			
 
				+    password = "5zqcjkmy"
			
 
				+    proxies = {
			
 
				+        "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
			
 
				+        "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
			
 
				+    }
			
 
				+    return proxies
			
 
				+
			
 
				+
			
 
				+def haiwai_tunnel_proxies():
			
 
				+    tunnel = "c101.kdlfps.com:18866"
			
 
				+    # 用户名密码方式
			
 
				+    username = "f2801246645"
			
 
				+    password = "q0i0ohnl"
			
 
				+    proxies = {
			
 
				+        "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
			
 
				+        "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
			
 
				+    }
			
 
				+    return proxies
			
--- a/application/common/redis/__init__.py
+++ b/application/common/redis/__init__.py
--- a/application/common/redis/pyredis.py
+++ b/application/common/redis/pyredis.py
@@ -0,0 +1,55 @@
 
				+"""
			
 
				+Redis client Python
			
 
				+@author luojunhui
			
 
				+"""
			
 
				+import redis
			
 
				+
			
 
				+
			
 
				+class RedisClient(object):
			
 
				+    """
			
 
				+    Redis client by python
			
 
				+    Todo 如果 Redis 服务挂了，怎么做能够不影响业务
			
 
				+    思路， 每次使用 redis 接口前先判断是否连接成功，如果连接失败则跳过 redis ，不影响全局
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.pool = None
			
 
				+        # self.host = 'r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com'
			
 
				+        self.host="r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com",  # 外网地址
			
 
				+        self.port = 6379
			
 
				+        self.db = 2
			
 
				+        self.password = 'Wqsd@2019'
			
 
				+
			
 
				+    def connect(self):
			
 
				+        """
			
 
				+        connect to redis server
			
 
				+        :return: bool
			
 
				+        """
			
 
				+        try:
			
 
				+            self.pool = redis.Redis(host=self.host, port=self.port, db=self.db, password=self.password)
			
 
				+            return True
			
 
				+        except Exception as e:
			
 
				+            print("connect to redis fail, the reason is {}".format(e))
			
 
				+            return False
			
 
				+
			
 
				+    def select(self, key):
			
 
				+        """
			
 
				+        read info from redis
			
 
				+        :return:
			
 
				+        """
			
 
				+        return self.pool.get(key)
			
 
				+
			
 
				+    def insert(self, key, value, expire_time):
			
 
				+        """
			
 
				+        insert info from redis
			
 
				+        :return:
			
 
				+        """
			
 
				+        self.pool.set(key, value, expire_time)
			
 
				+
			
 
				+    def delete(self, key):
			
 
				+        """
			
 
				+        delete key
			
 
				+        :param key:
			
 
				+        :return:
			
 
				+        """
			
 
				+        self.pool.delete(key)
			
--- a/application/common/redis/redis_helper.py
+++ b/application/common/redis/redis_helper.py
@@ -0,0 +1,67 @@
 
				+import redis
			
 
				+from datetime import timedelta
			
 
				+
			
 
				+
			
 
				+class SyncRedisHelper:
			
 
				+    _pool: redis.ConnectionPool = None
			
 
				+    _instance = None
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        if not self._instance:
			
 
				+            self._pool = self._get_pool()
			
 
				+            self._instance = self
			
 
				+
			
 
				+    def _get_pool(self) -> redis.ConnectionPool:
			
 
				+        if self._pool is None:
			
 
				+            self._pool = redis.ConnectionPool(
			
 
				+                host="r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com",  # 内网地址
			
 
				+                # host="r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com",  # 外网地址
			
 
				+                port=6379,
			
 
				+                db=2,
			
 
				+                password="Wqsd@2019",
			
 
				+                # password="Qingqu2019",
			
 
				+
			
 
				+            )
			
 
				+        return self._pool
			
 
				+
			
 
				+    def get_client(self) -> redis.Redis:
			
 
				+        pool = self._get_pool()
			
 
				+        client = redis.Redis(connection_pool=pool)
			
 
				+        return client
			
 
				+
			
 
				+    def close(self):
			
 
				+        if self._pool:
			
 
				+            self._pool.disconnect(inuse_connections=True)
			
 
				+
			
 
				+
			
 
				+def store_data(platform, out_video_id, condition, day_time):
			
 
				+    key = f"crawler:duplicate:{platform}:{out_video_id}"
			
 
				+    value = 1
			
 
				+    if condition:
			
 
				+        timeout = timedelta(days=int(day_time))
			
 
				+    else:
			
 
				+        timeout = timedelta(hours=int(day_time))
			
 
				+    helper = SyncRedisHelper()
			
 
				+    client = helper.get_client()
			
 
				+
			
 
				+    client.set(key, value)
			
 
				+    client.expire(key, timeout)
			
 
				+
			
 
				+
			
 
				+def get_data(platform, out_video_id):
			
 
				+    key = f"crawler:duplicate:{platform}:{out_video_id}"
			
 
				+    helper = SyncRedisHelper()
			
 
				+    client = helper.get_client()
			
 
				+    value = client.exists(key)
			
 
				+    return value
			
 
				+
			
 
				+
			
 
				+# 示例：存储一个数据
			
 
				+# store_data('xiaoniangao', '123457', True, 60)
			
 
				+
			
 
				+# # 示例：获取一个数据
			
 
				+# value = get_data('xiaoniangao', '1234857')
			
 
				+# if value is None:
			
 
				+#     print("Value does not exist")
			
 
				+# else:
			
 
				+#     print(f"Retrieved value: {value}")
			
--- a/application/common/redis/xng_redis.py
+++ b/application/common/redis/xng_redis.py
@@ -0,0 +1,54 @@
 
				+import json
			
 
				+
			
 
				+import redis
			
 
				+
			
 
				+
			
 
				+
			
 
				+class XNGSyncRedisHelper:
			
 
				+    _pool: redis.ConnectionPool = None
			
 
				+    _instance = None
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        if not self._instance:
			
 
				+            self._pool = self._get_pool()
			
 
				+            self._instance = self
			
 
				+
			
 
				+    def _get_pool(self) -> redis.ConnectionPool:
			
 
				+        if self._pool is None:
			
 
				+            self._pool = redis.ConnectionPool(
			
 
				+                # host="r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com",  # 外网地址
			
 
				+                host="r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com",  # 内网地址
			
 
				+                port=6379,
			
 
				+                db=0,
			
 
				+                password="Wqsd@2019",
			
 
				+                # password="Qingqu2019",
			
 
				+
			
 
				+            )
			
 
				+        return self._pool
			
 
				+
			
 
				+    def get_client(self) -> redis.Redis:
			
 
				+        pool = self._get_pool()
			
 
				+        client = redis.Redis(connection_pool=pool)
			
 
				+        return client
			
 
				+
			
 
				+    def close(self):
			
 
				+        if self._pool:
			
 
				+            self._pool.disconnect(inuse_connections=True)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def xng_get_video_data():
			
 
				+    """获取一条id"""
			
 
				+    task = f"task:xng_video_id"
			
 
				+    helper = XNGSyncRedisHelper()
			
 
				+    client = helper.get_client()
			
 
				+    ret = client.rpop(task)
			
 
				+    return ret
			
 
				+
			
 
				+def xng_in_video_data(ret):
			
 
				+    """写入"""
			
 
				+    task = f"task:xng_video_id"
			
 
				+    helper = XNGSyncRedisHelper()
			
 
				+    client = helper.get_client()
			
 
				+    client.rpush(task, ret)
			
--- a/application/config/__init__.py
+++ b/application/config/__init__.py
@@ -0,0 +1,3 @@
 
				+from .ipconfig import ip_config
			
 
				+from .mysql_config import env_dict
			
 
				+from .topic_group_queue import TopicGroup
			
--- a/application/config/aliyun_config.py
+++ b/application/config/aliyun_config.py
--- a/application/config/config.py
+++ b/application/config/config.py
@@ -0,0 +1,7 @@
 
				+# api 配置
			
 
				+crawler_api_domain = 'http://8.217.192.46:8889'
			
 
				+zhufuquanzi_view_api = crawler_api_domain + '/crawler/zhu_fu_quan_zi/detail_exposure'
			
 
				+zhufuquanzi_history_api = crawler_api_domain + '/crawler/zhu_fu_quan_zi/detail_history'
			
 
				+xiaoniangao_view_api = crawler_api_domain + '/crawler/xiao_nian_gao_plus/detail_exposure'
			
 
				+xiaoniangao_history_api = crawler_api_domain + '/crawler/xiao_nian_gao_plus/detail_history'
			
 
				+zhufuquanzi_log_upload_api = crawler_api_domain + '/crawler/zhu_fu_quan_zi/log_upload'
			
--- a/application/config/ipconfig.py
+++ b/application/config/ipconfig.py
@@ -0,0 +1,30 @@
 
				+"""
			
 
				+ipconfig
			
 
				+每一个容器和手机需要在同一个局域网，保证容器内appium和手机的网络通畅
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+def ip_config():
			
 
				+    ip_dict = {
			
 
				+        "machine_01": "",
			
 
				+        "machine_02": "",
			
 
				+        "machine_03": "",
			
 
				+        "machine_04": "",
			
 
				+        "machine_05": "",
			
 
				+        "machine_06": "",
			
 
				+        "machine_07": "",
			
 
				+        "machine_08": "",
			
 
				+        "machine_09": "",
			
 
				+        "machine_10": "",
			
 
				+        "machine_11": "",
			
 
				+        "machine_12": "",
			
 
				+        "machine_13": "",
			
 
				+        "machine_14": "",
			
 
				+        "machine_15": "",
			
 
				+        "machine_16": "",
			
 
				+        "machine_17": "",
			
 
				+        "machine_18": "",
			
 
				+        "machine_19": "",
			
 
				+        "machine_20": ""
			
 
				+    }
			
 
				+    return ip_dict
			
--- a/application/config/mysql_config.py
+++ b/application/config/mysql_config.py
@@ -0,0 +1,36 @@
 
				+"""
			
 
				+MySQL的配置任务
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+# 香港服务器, 暂时不写
			
 
				+mysql_hk = {
			
 
				+    "", ""
			
 
				+}
			
 
				+
			
 
				+# prod环境服务器地址
			
 
				+mysql_prod = {
			
 
				+    "host": "rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址，内网地址
			
 
				+    # host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址，外网地址
			
 
				+    "port": 3306,  # 端口号
			
 
				+    "user":"crawler",  # mysql用户名
			
 
				+    "passwd": "crawler123456@",  # mysql用户登录密码
			
 
				+    "db": "piaoquan-crawler",  # 数据库名
			
 
				+    "charset": "utf8mb4"  # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+}
			
 
				+# 测试环境Mysql服务器地址
			
 
				+mysql_dev = {
			
 
				+    "host": "rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址，内网地址
			
 
				+    # host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址，外网地址
			
 
				+    "port": 3306,  # 端口号
			
 
				+    "user":"crawler",  # mysql用户名
			
 
				+    "passwd": "crawler123456@",  # mysql用户登录密码
			
 
				+    "db": "piaoquan-crawler",  # 数据库名
			
 
				+    "charset": "utf8mb4"  # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+}
			
 
				+
			
 
				+env_dict = {
			
 
				+    "hk": mysql_hk,
			
 
				+    "prod": mysql_prod,
			
 
				+    "dev": mysql_dev
			
 
				+}
			
--- a/application/config/topic_group_queue.py
+++ b/application/config/topic_group_queue.py
@@ -0,0 +1,24 @@
 
				+import yaml
			
 
				+from utils.project_paths import config_dir
			
 
				+
			
 
				+class TopicGroup:
			
 
				+    def __init__(self, config_path=f"{config_dir}/topic_map.yaml"):
			
 
				+        with open(config_path, "r") as f:
			
 
				+            data = yaml.safe_load(f)
			
 
				+            self.topics = data.get("topics", [])  # 直接获取 topic 列表
			
 
				+
			
 
				+    def __iter__(self):
			
 
				+        """支持迭代遍历 topics"""
			
 
				+        return iter(self.topics)
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self.topics)
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return str(self.topics)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    tg = TopicGroup()
			
 
				+    print(tg)
			
 
				+
			
--- a/application/etl/__init__.py
+++ b/application/etl/__init__.py
--- a/application/etl/download.py
+++ b/application/etl/download.py
@@ -0,0 +1,134 @@
 
				+"""
			
 
				+下载视频
			
 
				+"""
			
 
				+import os
			
 
				+import json
			
 
				+import time
			
 
				+import asyncio
			
 
				+from hashlib import md5
			
 
				+import datetime
			
 
				+
			
 
				+import httpx
			
 
				+import requests
			
 
				+
			
 
				+
			
 
				+class VideoDownloader(object):
			
 
				+    """
			
 
				+    视频下载功能
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, video_obj):
			
 
				+        self.platform = video_obj['platform']
			
 
				+        self.video_id = video_obj['video_id']
			
 
				+        self.video_url = video_obj['video_url']
			
 
				+        self.cover_url = video_obj['cover_url']
			
 
				+        self.proxy = {
			
 
				+            "http://": "http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/",
			
 
				+            "https://": "http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/"
			
 
				+        }
			
 
				+        self.max_retry = 5
			
 
				+
			
 
				+    def generate_video_path(self):
			
 
				+        """
			
 
				+        通过视频信息生成唯一视频地址
			
 
				+        :return:
			
 
				+        """
			
 
				+        index = "{}-{}".format(self.platform, self.video_id)
			
 
				+        index = md5(index.encode()).hexdigest()
			
 
				+        temp_dir = "/Users/luojunhui/cyber/automatic_crawler"
			
 
				+        file_name = "{}.mp4".format(index)
			
 
				+        date_info = datetime.datetime.today().strftime("%Y%m%d")
			
 
				+        video_path = os.path.join(temp_dir, date_info, file_name)
			
 
				+        if os.path.exists(video_path):
			
 
				+            return
			
 
				+        else:
			
 
				+            os.makedirs(os.path.dirname(video_path), exist_ok=True)
			
 
				+        return video_path
			
 
				+
			
 
				+    async def download_video(self):
			
 
				+        """
			
 
				+        download video from the web
			
 
				+        :return:
			
 
				+        """
			
 
				+        if self.platform == "fuqiwang":
			
 
				+            download_path = self.generate_video_path()
			
 
				+            if download_path:
			
 
				+                headers = {
			
 
				+                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
			
 
				+                    'Accept-Encoding': 'identity;q=1, *;q=0',
			
 
				+                    'Accept': '*/*',
			
 
				+                    'Sec-Fetch-Site': 'cross-site',
			
 
				+                    'Sec-Fetch-Mode': 'no-cors',
			
 
				+                    'Sec-Fetch-Dest': 'video',
			
 
				+                    'Referer': 'https://servicewechat.com/wxa1431c6e7acdd32d/2/page-frame.html',
			
 
				+                    'Accept-Language': 'en-US,en;q=0.9',
			
 
				+                    'Range': 'bytes=0-',
			
 
				+                }
			
 
				+                async with httpx.AsyncClient(http2=True, proxies=self.proxy, headers=headers) as client:
			
 
				+                    try:
			
 
				+                        response = await client.get(self.video_url, headers=headers)
			
 
				+                        if response.status_code == 206:
			
 
				+                            with open(download_path, "wb") as f:
			
 
				+                                f.write(response.content)
			
 
				+                        else:
			
 
				+                            for _ in range(self.max_retry):
			
 
				+                                response = await client.get(self.video_url, headers=headers, follow_redirects=True)
			
 
				+                                if response.status_code == 206:
			
 
				+                                    with open(download_path, "wb") as f:
			
 
				+                                        f.write(response.content)
			
 
				+                                    break
			
 
				+                    except httpx.HTTPError as e:
			
 
				+                        print(f"An error occurred while downloading: {e}")
			
 
				+            else:
			
 
				+                print("视频已经存在")
			
 
				+
			
 
				+    def get_by_request(self):
			
 
				+        """
			
 
				+        req
			
 
				+        :return:
			
 
				+        """
			
 
				+        download_path = self.generate_video_path()
			
 
				+        headers = {
			
 
				+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
			
 
				+            'Accept-Encoding': 'identity;q=1, *;q=0',
			
 
				+            'Accept': '*/*',
			
 
				+            'Sec-Fetch-Site': 'cross-site',
			
 
				+            'Sec-Fetch-Mode': 'no-cors',
			
 
				+            'Sec-Fetch-Dest': 'video',
			
 
				+            'Referer': 'https://servicewechat.com/wxa1431c6e7acdd32d/2/page-frame.html',
			
 
				+            'Accept-Language': 'en-US,en;q=0.9',
			
 
				+            'Range': 'bytes=0-',
			
 
				+        }
			
 
				+        r = requests.get(
			
 
				+            url=self.video_url,
			
 
				+            headers=headers,
			
 
				+            proxies=self.proxy
			
 
				+        )
			
 
				+        print(r.status_code)
			
 
				+        with open("test.mp4", "wb") as f:
			
 
				+            f.write(r.content)
			
 
				+
			
 
				+
			
 
				+async def main(video_obj):
			
 
				+    """
			
 
				+    异步执行函数
			
 
				+    :param video_obj:
			
 
				+    :return:
			
 
				+    """
			
 
				+    downloader = VideoDownloader(video_obj)
			
 
				+    await downloader.download_video()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    video_o = {
			
 
				+        "update_time": 1709784300,
			
 
				+        "platform": "fuqiwang",
			
 
				+        "video_id": 142599,
			
 
				+        "title": "🔴3·8妇女节，最美的祝福，送给全天下的女神！",
			
 
				+        "type": 1,
			
 
				+        "video_type": 2,
			
 
				+        "cover_url": "https://znl-video-bos.cdn.bcebos.com/c6f12b49992ef638342065439f55b444/65e93632/picture/20240306/b8b0c1cc262c2394f111650c9f82e35a_thumb.jpg",
			
 
				+        "video_url": "https://znl-video-bos.cdn.bcebos.com/e368801a814c548e443835086d37caaf/65e93632/video/20240306/820ee1498e3ed2a59d37aed54d39ae95_1.mp4",
			
 
				+    }
			
 
				+    VideoDownloader(video_obj=video_o).get_by_request()
			
 
				+    # asyncio.run(main(video_obj=video_o))
			
--- a/application/functions/__init__.py
+++ b/application/functions/__init__.py
@@ -0,0 +1,3 @@
 
				+from .get_redirect_url import get_redirect_url
			
 
				+from .clean_title import clean_title
			
 
				+from .read_mysql_config import get_config_from_mysql
			
--- a/application/functions/appium_tools.py
+++ b/application/functions/appium_tools.py
@@ -0,0 +1,26 @@
 
				+"""
			
 
				+Appium 的一些公共方法
			
 
				+"""
			
 
				+import time
			
 
				+from selenium.webdriver.common.by import By
			
 
				+from selenium.common.exceptions import NoSuchElementException
			
 
				+
			
 
				+
			
 
				+def search_elements(driver, xpath):
			
 
				+    """
			
 
				+    获取元素
			
 
				+    :param driver:
			
 
				+    :param xpath:
			
 
				+    :return:
			
 
				+    """
			
 
				+    time.sleep(1)
			
 
				+    windowHandles = driver.window_handles
			
 
				+    for handle in windowHandles:
			
 
				+        driver.switch_to.window(handle)
			
 
				+        time.sleep(1)
			
 
				+        try:
			
 
				+            elements = driver.find_elements(By.XPATH, xpath)
			
 
				+            if elements:
			
 
				+                return elements
			
 
				+        except NoSuchElementException:
			
 
				+            pass
			
--- a/application/functions/clean_title.py
+++ b/application/functions/clean_title.py
@@ -0,0 +1,22 @@
 
				+def clean_title(strings):
			
 
				+    return (
			
 
				+        strings.strip()
			
 
				+        .replace("\n", "")
			
 
				+        .replace("/", "")
			
 
				+        .replace("\r", "")
			
 
				+        .replace("#", "")
			
 
				+        .replace(".", "。")
			
 
				+        .replace("\\", "")
			
 
				+        .replace("&NBSP", "")
			
 
				+        .replace(":", "")
			
 
				+        .replace("*", "")
			
 
				+        .replace("？", "")
			
 
				+        .replace("?", "")
			
 
				+        .replace('"', "")
			
 
				+        .replace("<", "")
			
 
				+        .replace(">", "")
			
 
				+        .replace("|", "")
			
 
				+        .replace(" ", "")
			
 
				+        .replace('"', "")
			
 
				+        .replace("'", "")
			
 
				+    )
			
--- a/application/functions/crypt.py
+++ b/application/functions/crypt.py
@@ -0,0 +1,3 @@
 
				+"""
			
 
				+爬虫逆向加密算法
			
 
				+"""
			
--- a/application/functions/get_redirect_url.py
+++ b/application/functions/get_redirect_url.py
@@ -0,0 +1,9 @@
 
				+import requests
			
 
				+
			
 
				+
			
 
				+def get_redirect_url(url):
			
 
				+    res = requests.get(url, allow_redirects=False)
			
 
				+    if res.status_code == 302 or res.status_code == 301:
			
 
				+        return res.headers['Location']
			
 
				+    else:
			
 
				+        return url
			
--- a/application/functions/read_mysql_config.py
+++ b/application/functions/read_mysql_config.py
@@ -0,0 +1,46 @@
 
				+import json
			
 
				+
			
 
				+from application.common.mysql import MysqlHelper
			
 
				+
			
 
				+
			
 
				+def get_config_from_mysql(log_type, source, text):
			
 
				+    """
			
 
				+    :param log_type: mode
			
 
				+    :param source: platform
			
 
				+    :param text:
			
 
				+    :return:
			
 
				+    """
			
 
				+    select_sql = f"""select config from crawler_config where source="{source}" """
			
 
				+    MySQL = MysqlHelper(mode=log_type, platform=select_sql)
			
 
				+    configs = MySQL.select(select_sql)
			
 
				+    title_list = []
			
 
				+    filter_list = []
			
 
				+    emoji_list = []
			
 
				+    search_word_list = []
			
 
				+    for config in configs:
			
 
				+        config_dict = json.loads(config[0])
			
 
				+        for k, v in config_dict.items():
			
 
				+            if k == "title":
			
 
				+                title_list_config = v.split(",")
			
 
				+                for title in title_list_config:
			
 
				+                    title_list.append(title)
			
 
				+            if k == "filter":
			
 
				+                filter_list_config = v.split(",")
			
 
				+                for filter_word in filter_list_config:
			
 
				+                    filter_list.append(filter_word)
			
 
				+            if k == "emoji":
			
 
				+                emoji_list_config = v.split(",")
			
 
				+                for emoji in emoji_list_config:
			
 
				+                    emoji_list.append(emoji)
			
 
				+            if k == "search_word":
			
 
				+                search_word_list_config = v.split(",")
			
 
				+                for search_word in search_word_list_config:
			
 
				+                    search_word_list.append(search_word)
			
 
				+    if text == "title":
			
 
				+        return title_list
			
 
				+    elif text == "filter":
			
 
				+        return filter_list
			
 
				+    elif text == "emoji":
			
 
				+        return emoji_list
			
 
				+    elif text == "search_word":
			
 
				+        return search_word_list
			
--- a/application/functions/zqkd_db_redis.py
+++ b/application/functions/zqkd_db_redis.py
@@ -0,0 +1,240 @@
 
				+import os
			
 
				+import sys
			
 
				+import threading
			
 
				+import traceback
			
 
				+from datetime import datetime, timedelta
			
 
				+
			
 
				+import redis
			
 
				+
			
 
				+from application.common import Local
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+from application.common.mysql import MysqlHelper
			
 
				+
			
 
				+
			
 
				+class DatabaseOperations:
			
 
				+    def __init__(self, mode, platform):
			
 
				+        self.mysql = MysqlHelper(mode=mode, platform=platform)
			
 
				+        self.LocalLog = Local.logger(platform, mode)
			
 
				+        self.mode = mode
			
 
				+        self.platform = platform
			
 
				+
			
 
				+    def check_user_id(self, uid):
			
 
				+        """
			
 
				+        检查指定用户ID是否存在于数据库的zqkd_uid表中。
			
 
				+
			
 
				+        :param uid:要检查的用户ID
			
 
				+        :return:如果用户ID存在于表中返回True，否则返回False
			
 
				+        """
			
 
				+        try:
			
 
				+            query_sql = f""" SELECT uid FROM zqkd_user WHERE uid = "{uid}"; """
			
 
				+            result = self.mysql.select(sql=query_sql)
			
 
				+            return bool(result)
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"检查用户ID失败: {e}\n{tb}")
			
 
				+            return False
			
 
				+
			
 
				+    def update_user(self, uid, user_name, avatar_url):
			
 
				+        """
			
 
				+        更新数据库中指定用户的用户名和头像URL。
			
 
				+
			
 
				+        :param uid:要更新信息的用户ID
			
 
				+        :param user_name:新的用户名
			
 
				+        :param avatar_url:新的头像URL
			
 
				+        :return:如果更新操作成功，返回更新操作的结果（通常是影响的行数），失败则返回None或抛出异常
			
 
				+        """
			
 
				+        try:
			
 
				+            update_sql = f""" UPDATE zqkd_user SET avatar_url = "{avatar_url}", user_name = "{user_name}" WHERE uid = "{uid}"; """
			
 
				+            return self.mysql.update(sql=update_sql)
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"更新用户信息失败: {e}\n{tb}")
			
 
				+            return None
			
 
				+
			
 
				+    def insert_user(self, uid, user_name, avatar_url):
			
 
				+        """
			
 
				+        向数据库的zqkd_user表中插入或更新用户信息
			
 
				+
			
 
				+        :param uid: 用户ID（数值类型）
			
 
				+        :param user_name: 用户名
			
 
				+        :param avatar_url: 头像URL
			
 
				+        :return: 成功返回影响的行数，失败返回None
			
 
				+        """
			
 
				+        try:
			
 
				+            # 直接拼接SQL（不推荐，有SQL注入风险）
			
 
				+            insert_sql = f"""
			
 
				+                INSERT INTO zqkd_user (uid, avatar_url, user_name) 
			
 
				+                VALUES ({uid}, '{avatar_url.replace("'", "''")}', '{user_name.replace("'", "''")}') 
			
 
				+                ON DUPLICATE KEY UPDATE 
			
 
				+                user_name = '{user_name.replace("'", "''")}', 
			
 
				+                avatar_url = '{avatar_url.replace("'", "''")}'
			
 
				+            """
			
 
				+            return self.mysql.update(sql=insert_sql)
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"插入用户信息失败: {e}\n{tb}")
			
 
				+            return None
			
 
				+    def get_today_videos(self):
			
 
				+        try:
			
 
				+            # 手动转义单引号（仅缓解部分风险）
			
 
				+
			
 
				+            sql = """
			
 
				+                        SELECT count(*) as cnt
			
 
				+                        FROM crawler_video 
			
 
				+                        WHERE create_time >= CURDATE() 
			
 
				+                          AND create_time < CURDATE() + INTERVAL 1 DAY 
			
 
				+                          AND platform = %s 
			
 
				+                          AND strategy = %s
			
 
				+                    """
			
 
				+            result = self.mysql.select_params(sql, (self.platform,self.mode))
			
 
				+            if result and len(result) > 0:
			
 
				+                return result[0][0]  # 返回第一行第一列的计数值
			
 
				+            return 0  # 无结果时返回0
			
 
				+        except Exception as e:
			
 
				+            self.LocalLog.error(f"查询失败: {e}")
			
 
				+            return 0
			
 
				+    def select_user(self, last_scanned_id=0):
			
 
				+        """
			
 
				+        根据last_scanned_id查询用户数据
			
 
				+        :param last_scanned_id: 上次扫描的ID，0表示从头开始
			
 
				+        :return: 查询结果列表
			
 
				+        """
			
 
				+        try:
			
 
				+            # 构建查询（根据last_scanned_id过滤）
			
 
				+            query = "SELECT id, uid FROM zqkd_user"
			
 
				+            if last_scanned_id > 0:
			
 
				+                query += f" WHERE id > {last_scanned_id}"
			
 
				+            query += " ORDER BY id ASC"
			
 
				+
			
 
				+            return self.mysql.select(query)
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"查询用户列表失败: {e}\n{tb}")
			
 
				+            return []
			
 
				+
			
 
				+
			
 
				+class RedisOperations:
			
 
				+    _pool: redis.ConnectionPool = None
			
 
				+    _instance = None
			
 
				+    _lock = threading.Lock()  # 用于线程安全的单例创建
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_instance(cls, mode="", platform=""):
			
 
				+        """线程安全的单例获取方法"""
			
 
				+        if not cls._instance:
			
 
				+            with cls._lock:
			
 
				+                if not cls._instance:
			
 
				+                    cls._instance = cls(mode, platform)
			
 
				+        return cls._instance
			
 
				+
			
 
				+    def __init__(self, mode, platform):
			
 
				+        # 私有构造函数，使用 get_instance() 获取实例
			
 
				+        self.mode = mode
			
 
				+        self.platform = platform
			
 
				+        self.LocalLog = Local.logger(self.platform, self.mode)
			
 
				+        if RedisOperations._instance is not None:
			
 
				+            raise Exception("请使用 get_instance() 获取实例")
			
 
				+
			
 
				+        self._pool = self._get_pool()
			
 
				+        self.client = redis.Redis(connection_pool=self._pool, decode_responses=True)  # 复用同一个客户端
			
 
				+
			
 
				+    def _get_pool(self) -> redis.ConnectionPool:
			
 
				+        if self._pool is None:
			
 
				+            try:
			
 
				+                self._pool = redis.ConnectionPool(
			
 
				+                    host="r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com",
			
 
				+                    port=6379,
			
 
				+                    db=0,
			
 
				+                    password="Wqsd@2019",
			
 
				+                    max_connections=50,  # 增加最大连接数
			
 
				+                    socket_timeout=10,
			
 
				+                    retry_on_timeout=True
			
 
				+                )
			
 
				+            except Exception as e:
			
 
				+                tb = traceback.format_exc()
			
 
				+                self.LocalLog.error(f"创建Redis连接池失败: {e}\n{tb}")
			
 
				+                raise
			
 
				+        return self._pool
			
 
				+
			
 
				+    def close(self):
			
 
				+        """关闭连接池"""
			
 
				+        try:
			
 
				+            if self._pool:
			
 
				+                self._pool.disconnect(inuse_connections=True)
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"关闭Redis连接池失败: {e}\n{tb}")
			
 
				+
			
 
				+    def get_recommend_video(self, task="task:zqkd_video_id"):
			
 
				+        """从Redis的指定列表中弹出并返回最左边的视频ID"""
			
 
				+        try:
			
 
				+            value_bytes = self.client.rpop(task)
			
 
				+            value_str = value_bytes.decode('utf-8')
			
 
				+            return value_str
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"获取推荐视频ID失败: {e}\n{tb}")
			
 
				+            return None
			
 
				+
			
 
				+    def check_video_id_exists(self, videoID):
			
 
				+        """检查指定的视频ID是否已经存在于Redis中"""
			
 
				+        try:
			
 
				+            key = f"crawler:zqkd:{videoID}"
			
 
				+            return self.client.exists(key)
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"检查视频ID是否存在失败: {e}\n{tb}")
			
 
				+            return False
			
 
				+
			
 
				+    def save_video_id(self, videoID):
			
 
				+        """将视频ID存储到Redis中，并为其设置3天的过期时间"""
			
 
				+        try:
			
 
				+            key = f"crawler:zqkd:{videoID}"
			
 
				+            expiration_time = int(timedelta(days=3).total_seconds())
			
 
				+            self.client.setex(key, expiration_time, "1")
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"保存视频ID失败: {e}\n{tb}")
			
 
				+
			
 
				+    def save_recommend_video(self, videoID):
			
 
				+        """将推荐视频ID添加到Redis的指定列表中，并为该列表设置2天的过期时间"""
			
 
				+        try:
			
 
				+            task = "task:zqkd_video_id"
			
 
				+            pipe = self.client.pipeline()  # 使用管道执行多个命令
			
 
				+            pipe.rpush(task, videoID)
			
 
				+            pipe.expire(task, int(timedelta(days=2).total_seconds()))
			
 
				+            pipe.execute()
			
 
				+
			
 
				+            # 检查数据是否写入成功
			
 
				+            list_length = self.client.llen(task)
			
 
				+            self.LocalLog.info(f"保存推荐视频ID成功，列表长度: {list_length}")
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"保存推荐视频ID失败: {e}\n{tb}")
			
 
				+
			
 
				+    def get_last_scanned_id(self):
			
 
				+        """获取上次扫描的ID"""
			
 
				+        try:
			
 
				+            return self.client.get("zqkd_last_scanned_id").decode('utf-8')
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"获取上次扫描的ID失败: {e}\n{tb}")
			
 
				+            return None
			
 
				+
			
 
				+    def set_last_scanned_id(self, last_scanned_id):
			
 
				+        """设置上次扫描的ID"""
			
 
				+        try:
			
 
				+            result = self.client.set("zqkd_last_scanned_id", last_scanned_id)
			
 
				+            if result:
			
 
				+                self.LocalLog.info(f"成功设置上次扫描的ID: {last_scanned_id}")
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.LocalLog.error(f"设置上次扫描的ID失败: {e}\n{tb}")
			
 
				+            return False
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    db = DatabaseOperations("author", "zhongqingkandianauthor")
			
 
				+    print(db.get_today_videos())
			
--- a/application/items/__init__.py
+++ b/application/items/__init__.py
@@ -0,0 +1 @@
 
				+from .item import VideoItem
			
--- a/application/items/item.py
+++ b/application/items/item.py
@@ -0,0 +1,94 @@
 
				+import time
			
 
				+from application.functions import clean_title
			
 
				+
			
 
				+
			
 
				+class VideoItem(object):
			
 
				+    """
			
 
				+    function: 当扫描进一条视频的时候，对该视频的基本信息进行处理，保证发送给 pipeline和 etl 的 video_dict 是正确的
			
 
				+    __init__: 初始化空json 对象，用来存储视频信息
			
 
				+    add_video_info: 把视频信息存储到 item 对象中
			
 
				+    check_item: 检查 item 对象中的各个元素以及处理
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.item = {}
			
 
				+
			
 
				+    def add_video_info(self, key, value):
			
 
				+        self.item[key] = value
			
 
				+
			
 
				+    def check_item(self):
			
 
				+        """
			
 
				+        判断item 里面的字段，是否符合要求
			
 
				+        字段分为 3 类：
			
 
				+        1. 必须存在数据的字段： ["video_id", "user_id", "user_name", "out_user_id", "out_video_id", "session", "video_url", "cover_url", "platform", "strategy"]
			
 
				+        2. 不存在默认为 0 的字段 ：["duration", "play_cnt", "like_cnt", "comment_cnt", "share_cnt", "width", "height"]
			
 
				+        3. 需要后出理的字段： video_title, publish_time
			
 
				+        """
			
 
				+        if self.item.get("video_title"):
			
 
				+            self.item["video_title"] = clean_title(self.item["video_title"])
			
 
				+        else:
			
 
				+            return False
			
 
				+        if self.item.get("publish_time_stamp"):
			
 
				+            publish_time_str = time.strftime(
			
 
				+                "%Y-%m-%d %H:%M:%S", time.localtime(self.item["publish_time_stamp"])
			
 
				+            )
			
 
				+            self.add_video_info("publish_time_str", publish_time_str)
			
 
				+        else:
			
 
				+            publish_time_stamp = int(time.time())
			
 
				+            publish_time_str = time.strftime(
			
 
				+                "%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp)
			
 
				+            )
			
 
				+            self.add_video_info("publish_time_stamp", publish_time_stamp)
			
 
				+            self.add_video_info("publish_time_str", publish_time_str)
			
 
				+        self.add_video_info("publish_time", publish_time_str)
			
 
				+        if not self.item.get("update_time_stamp"):
			
 
				+            self.add_video_info("update_time_stamp", int(time.time()))
			
 
				+
			
 
				+        # 如果不存在，默认值为 0
			
 
				+        config_keys = [
			
 
				+            "duration",
			
 
				+            "play_cnt",
			
 
				+            "like_cnt",
			
 
				+            "comment_cnt",
			
 
				+            "share_cnt",
			
 
				+            "width",
			
 
				+            "height",
			
 
				+        ]
			
 
				+        for config_key in config_keys:
			
 
				+            if self.item.get(config_key):
			
 
				+                continue
			
 
				+            else:
			
 
				+                self.add_video_info(config_key, 0)
			
 
				+
			
 
				+        # 必须存在的元素，若不存在则会报错
			
 
				+        must_keys = [
			
 
				+            "video_id",
			
 
				+            "user_id",
			
 
				+            "user_name",
			
 
				+            "out_video_id",
			
 
				+            "session",
			
 
				+            "video_url",
			
 
				+            "cover_url",
			
 
				+            "platform",
			
 
				+            "strategy",
			
 
				+        ]
			
 
				+        """
			
 
				+        video_id, out_video_id 均为站外视频 id
			
 
				+        usr_id: 站内用户 id
			
 
				+        out_user_id: 站外用户 id
			
 
				+        user_name: 站外用户名称
			
 
				+        """
			
 
				+        for m_key in must_keys:
			
 
				+            if self.item.get(m_key):
			
 
				+                continue
			
 
				+            else:
			
 
				+                # print(m_key)
			
 
				+                return False
			
 
				+        return True
			
 
				+
			
 
				+    def produce_item(self):
			
 
				+        flag = self.check_item()
			
 
				+        if flag:
			
 
				+            return self.item
			
 
				+        else:
			
 
				+            return False
			
--- a/application/pipeline/__init__.py
+++ b/application/pipeline/__init__.py
@@ -0,0 +1,2 @@
 
				+from .pipeline_dev import PiaoQuanPipelineTest
			
 
				+from .pipeline import PiaoQuanPipeline
			
--- a/application/pipeline/pipeline.py
+++ b/application/pipeline/pipeline.py
@@ -0,0 +1,273 @@
 
				+import hashlib
			
 
				+import re
			
 
				+import sys
			
 
				+import os
			
 
				+import time
			
 
				+
			
 
				+from application.common.feishu.feishu_utils import FeishuUtils
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+from datetime import datetime
			
 
				+
			
 
				+from application.common import MysqlHelper, AliyunLogger
			
 
				+from application.common.redis.pyredis import RedisClient
			
 
				+
			
 
				+
			
 
				+class PiaoQuanPipeline(object):
			
 
				+    """
			
 
				+    爬虫管道——爬虫规则判断
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, platform, mode, rule_dict, env, item, trace_id, account=None):
			
 
				+        self.platform = platform
			
 
				+        self.mode = mode
			
 
				+        self.item = item
			
 
				+        self.rule_dict = rule_dict
			
 
				+        self.env = env
			
 
				+        self.trace_id = trace_id
			
 
				+        self.mysql = MysqlHelper(env=env, mode=mode, platform=platform)
			
 
				+        self.aliyun_log = AliyunLogger(platform=platform, mode=mode, env=env)
			
 
				+        self.account = account
			
 
				+        self.red = RedisClient()
			
 
				+
			
 
				+    def feishu_time_list(self):
			
 
				+        summary = FeishuUtils.get_values_batch("KsoMsyP2ghleM9tzBfmcEEXBnXg", "RuLK77")
			
 
				+        for row in summary[1:]:
			
 
				+            channel = row[0]
			
 
				+            day_count = row[1]
			
 
				+            if channel:
			
 
				+                if channel == self.platform:
			
 
				+                    return day_count
			
 
				+            else:
			
 
				+                return None
			
 
				+        return None
			
 
				+
			
 
				+    def publish_time_flag(self):
			
 
				+        """
			
 
				+        判断发布时间是否过期
			
 
				+        :return: True or False
			
 
				+        """
			
 
				+        # 判断发布时间
			
 
				+        publish_time_stamp = self.item["publish_time_stamp"]
			
 
				+        update_time_stamp = self.item["update_time_stamp"]
			
 
				+        max_d = self.rule_dict.get("period", {}).get("max", 1000)
			
 
				+        min_d = self.rule_dict.get("period", {}).get("min", 1000)
			
 
				+        days = max_d if max_d > min_d else min_d
			
 
				+        days_time = self.feishu_time_list()
			
 
				+        if days_time:
			
 
				+            days = int(days_time)
			
 
				+        if self.platform == "gongzhonghao":
			
 
				+            if (
			
 
				+                    int(time.time()) - publish_time_stamp
			
 
				+                    > 3600 * 24 * days
			
 
				+            ) and (
			
 
				+                    int(time.time()) - update_time_stamp
			
 
				+                    > 3600 * 24 * days
			
 
				+            ):
			
 
				+                self.aliyun_log.logging(
			
 
				+                    code="2004",
			
 
				+                    trace_id=self.trace_id,
			
 
				+                    data=self.item,
			
 
				+                    message="发布时间超过{}天".format(days),
			
 
				+                )
			
 
				+                return False
			
 
				+        else:
			
 
				+            if days == 0:
			
 
				+                publish_time_stamp = int(time.time())  # 示例时间戳
			
 
				+                is_today = datetime.fromtimestamp(publish_time_stamp).date() == datetime.today().date()
			
 
				+                if not is_today:
			
 
				+                    return False
			
 
				+
			
 
				+            elif (
			
 
				+                    int(time.time()) - publish_time_stamp
			
 
				+                    > 3600 * 24 * days
			
 
				+            ):
			
 
				+                self.aliyun_log.logging(
			
 
				+                    code="2004",
			
 
				+                    trace_id=self.trace_id,
			
 
				+                    data=self.item,
			
 
				+                    message="发布时间超过{}天".format(days),
			
 
				+                )
			
 
				+                return False
			
 
				+        return True
			
 
				+
			
 
				+    def title_flag(self):
			
 
				+        """
			
 
				+        视频标题是否满足需求
			
 
				+        :return:
			
 
				+        """
			
 
				+        title = self.item["video_title"]
			
 
				+        cleaned_title = re.sub(r"[^\w]", " ", title)
			
 
				+        # 敏感词
			
 
				+        # 获取敏感词列表
			
 
				+        sensitive_words = []
			
 
				+        if any(word in cleaned_title for word in sensitive_words):
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="2003",
			
 
				+                trace_id=self.trace_id,
			
 
				+                message="标题中包含敏感词",
			
 
				+                data=self.item,
			
 
				+                account=self.account
			
 
				+            )
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
 
				+    def download_rule_flag(self):
			
 
				+        """
			
 
				+        视频基础下载规则
			
 
				+        :return:
			
 
				+        """
			
 
				+        for key in self.item:
			
 
				+            if self.rule_dict.get(key):
			
 
				+                max_value = (
			
 
				+                    int(self.rule_dict[key]["max"])
			
 
				+                    if int(self.rule_dict[key]["max"]) > 0
			
 
				+                    else 999999999999999
			
 
				+                )
			
 
				+                if key == "peroid":  # peroid是抓取周期天数
			
 
				+                    continue
			
 
				+                else:
			
 
				+                    flag = int(self.rule_dict[key]["min"]) <= int(self.item[key]) <= max_value
			
 
				+                    if not flag:
			
 
				+                        self.aliyun_log.logging(
			
 
				+                            code="2004",
			
 
				+                            trace_id=self.trace_id,
			
 
				+                            data=self.item,
			
 
				+                            message="{}: {} <= {} <= {}, {}".format(
			
 
				+                                key,
			
 
				+                                self.rule_dict[key]["min"],
			
 
				+                                self.item[key],
			
 
				+                                max_value,
			
 
				+                                flag,
			
 
				+                            ),
			
 
				+                            account=self.account
			
 
				+                        )
			
 
				+                        return flag
			
 
				+            else:
			
 
				+                continue
			
 
				+        return True
			
 
				+
			
 
				+    def feishu_list(self):
			
 
				+        summary = FeishuUtils.get_values_batch("KsoMsyP2ghleM9tzBfmcEEXBnXg", "letS93")
			
 
				+        for row in summary[1:]:
			
 
				+            channel = row[0]
			
 
				+            day_count = row[1]
			
 
				+            if channel:
			
 
				+                if channel == self.platform:
			
 
				+                    return day_count
			
 
				+            else:
			
 
				+                return None
			
 
				+        return None
			
 
				+
			
 
				+    # 按照某个具体平台来去重
			
 
				+    def repeat_video(self):
			
 
				+        """
			
 
				+        视频是否重复
			
 
				+        :return:
			
 
				+        """
			
 
				+        out_id = self.item["out_video_id"]
			
 
				+        day_count = self.feishu_list()
			
 
				+        if day_count:
			
 
				+            sql_2 = f"""select create_time from crawler_video where platform = "{self.platform}" and  out_video_id="{out_id}" AND create_time >= DATE_SUB(NOW(), INTERVAL {int(day_count)} DAY);"""
			
 
				+            repeat_video = self.mysql.select(sql=sql_2)
			
 
				+            if repeat_video:
			
 
				+                self.aliyun_log.logging(
			
 
				+                    code="2002",
			
 
				+                    trace_id=self.trace_id,
			
 
				+                    message="重复的视频",
			
 
				+                    data=self.item,
			
 
				+                    account=self.account
			
 
				+                )
			
 
				+                return False
			
 
				+            else:
			
 
				+                return True
			
 
				+
			
 
				+        if self.platform == "zhufuniannianshunxinjixiang" or  self.platform == "weiquanshipin" or  self.platform == "piaoquangushi" or  self.platform == "lepaoledong" or  self.platform == "zhufukuaizhuan" or self.platform == "linglingkuailezhufu" or self.platform == "lepaoledongdijie":
			
 
				+            return True
			
 
				+        if self.platform == "jierizhufuhuakaifugui" or self.platform == "yuannifuqimanman" or self.platform == "haoyunzhufuduo" or self.platform == "quzhuan" or self.platform == "zhufudewenhou" or self.platform == "jierizhufuxingfujixiang" or self.platform == "haoyoushipin" or self.platform == "xinshiquan" or self.platform == "laonianshenghuokuaile" or self.platform == "laonianquan":
			
 
				+            return True
			
 
				+        if self.platform == "zhuwanwufusunew" and self.mode == "recommend":
			
 
				+            return True
			
 
				+        if self.platform == "jixiangxingfu" and self.mode == "recommend":
			
 
				+            return True
			
 
				+        if self.platform == "yuannifuqichangzai" and self.mode == "recommend":
			
 
				+            return True
			
 
				+        if self.platform == "benshanzhufu" and self.mode == "recommend":
			
 
				+            return True
			
 
				+        if self.platform == "zuihaodesongni" and self.mode == "recommend":
			
 
				+            return True
			
 
				+        if self.platform == "tiantianjufuqi" and self.mode == "recommend":
			
 
				+            return True
			
 
				+        # 判断加上标题去重
			
 
				+        if self.mode == "recommend" and self.platform == "zhufuhaoyunbaofu":
			
 
				+            title = self.item["video_title"]
			
 
				+            sql = f""" select 1 from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}" and video_title="{title}"; """
			
 
				+        else:
			
 
				+            sql = f""" select 1 from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}"; """
			
 
				+        repeat_video = self.mysql.select(sql=sql)
			
 
				+        if repeat_video:
			
 
				+            # 喜事多多平台 4 天去重一次
			
 
				+            if self.platform == "xishiduoduo":
			
 
				+                sql_2 = f"""select create_time from crawler_video where out_video_id="{out_id}";"""
			
 
				+                video_time = self.mysql.select(sql=sql_2)[0][0].timestamp()
			
 
				+                if int(time.time()) - video_time >= 86400 * 4:
			
 
				+                    return True
			
 
				+            # 小年糕推荐流和祝福圈子推荐流 3 天去重一次
			
 
				+            elif self.platform == "xiaoniangaotuijianliu" or self.platform == "zhufuquanzituijianliu":
			
 
				+                sql_2 = f"""select create_time from crawler_video where out_video_id="{out_id}";"""
			
 
				+                video_time = self.mysql.select(sql=sql_2)[0][0].timestamp()
			
 
				+                if int(time.time()) - video_time >= 86400 * 3:
			
 
				+                    return True
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="2002",
			
 
				+                trace_id=self.trace_id,
			
 
				+                message="重复的视频",
			
 
				+                data=self.item,
			
 
				+                account=self.account
			
 
				+            )
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
 
				+    # def mq_exists(self):
			
 
				+    #     """
			
 
				+    #     检测 mq 是否已经发送过了
			
 
				+    #     :return:
			
 
				+    #     """
			
 
				+    #     if self.red.connect():
			
 
				+    #         index_txt = "{}-{}".format(self.platform, self.item['video_id'])
			
 
				+    #         index_md5 = hashlib.md5(index_txt.encode()).hexdigest()
			
 
				+    #         if self.red.select(index_md5):
			
 
				+    #             self.aliyun_log.logging(
			
 
				+    #                 code="2007",
			
 
				+    #                 trace_id=self.trace_id,
			
 
				+    #                 message="该视频 mq 已经发送"
			
 
				+    #             )
			
 
				+    #             return False
			
 
				+    #         else:
			
 
				+    #             self.red.insert(index_md5, int(time.time()), 43200)
			
 
				+    #             return True
			
 
				+    #     else:
			
 
				+    #         return True
			
 
				+
			
 
				+    def process_item(self):
			
 
				+        """
			
 
				+        全规则判断，符合规则的数据则return True
			
 
				+        :return:
			
 
				+        """
			
 
				+        # 判断该 mq 是否已经发了
			
 
				+        # if not self.mq_exists():
			
 
				+        #     return False
			
 
				+        if not self.publish_time_flag():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        if not self.title_flag():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        if not self.repeat_video():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        if not self.download_rule_flag():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
--- a/application/pipeline/pipeline_dev.py
+++ b/application/pipeline/pipeline_dev.py
@@ -0,0 +1,112 @@
 
				+import re
			
 
				+import time
			
 
				+
			
 
				+
			
 
				+class PiaoQuanPipelineTest:
			
 
				+    def __init__(self, platform, mode, rule_dict, env, item, trace_id):
			
 
				+        self.platform = platform
			
 
				+        self.mode = mode
			
 
				+        self.item = item
			
 
				+        self.rule_dict = rule_dict
			
 
				+        self.env = env
			
 
				+        self.trace_id = trace_id
			
 
				+
			
 
				+    # 视频的发布时间限制, 属于是规则过滤
			
 
				+    def publish_time_flag(self):
			
 
				+        # 判断发布时间
			
 
				+        publish_time_stamp = self.item["publish_time_stamp"]
			
 
				+        update_time_stamp = self.item["update_time_stamp"]
			
 
				+        if self.platform == "gongzhonghao":
			
 
				+            if (
			
 
				+                    int(time.time()) - publish_time_stamp
			
 
				+                    > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
			
 
				+            ) and (
			
 
				+                    int(time.time()) - update_time_stamp
			
 
				+                    > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
			
 
				+            ):
			
 
				+                message = "发布时间超过{}天".format(
			
 
				+                    int(self.rule_dict.get("period", {}).get("max", 1000))
			
 
				+                )
			
 
				+                print(message)
			
 
				+                return False
			
 
				+        else:
			
 
				+            if (
			
 
				+                    int(time.time()) - publish_time_stamp
			
 
				+                    > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
			
 
				+            ):
			
 
				+                message = "发布时间超过{}天".format(
			
 
				+                    int(self.rule_dict.get("period", {}).get("max", 1000))
			
 
				+                )
			
 
				+                print(message)
			
 
				+                return False
			
 
				+        return True
			
 
				+
			
 
				+    # 视频标题是否满足需求
			
 
				+    def title_flag(self):
			
 
				+        title = self.item["video_title"]
			
 
				+        cleaned_title = re.sub(r"[^\w]", " ", title)
			
 
				+        # 敏感词
			
 
				+        # 获取敏感词列表
			
 
				+        sensitive_words = []
			
 
				+        if any(word in cleaned_title for word in sensitive_words):
			
 
				+            message = "标题中包含敏感词"
			
 
				+            print(message)
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
 
				+    # 视频基础下载规则
			
 
				+    def download_rule_flag(self):
			
 
				+        for key in self.item:
			
 
				+            if self.rule_dict.get(key):
			
 
				+                max_value = (
			
 
				+                    int(self.rule_dict[key]["max"])
			
 
				+                    if int(self.rule_dict[key]["max"]) > 0
			
 
				+                    else 999999999999999
			
 
				+                )
			
 
				+                if key == "peroid":  # peroid是抓取周期天数
			
 
				+                    continue
			
 
				+                else:
			
 
				+                    flag = int(self.rule_dict[key]["min"]) <= int(self.item[key]) <= max_value
			
 
				+                    if not flag:
			
 
				+                        message = "{}: {} <= {} <= {}, {}".format(
			
 
				+                            key,
			
 
				+                            self.rule_dict[key]["min"],
			
 
				+                            self.item[key],
			
 
				+                            max_value,
			
 
				+                            flag,
			
 
				+                        )
			
 
				+                        print(message)
			
 
				+                        return flag
			
 
				+            else:
			
 
				+                continue
			
 
				+        return True
			
 
				+
			
 
				+    # 按照某个具体平台来去重
			
 
				+    def repeat_video(self):
			
 
				+        # sql = f""" select * from crawler_video where platform="公众号" and out_video_id="{video_id}"; """
			
 
				+        # out_id = self.item["out_video_id"]
			
 
				+        # sql = f""" select * from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}"; """
			
 
				+        # repeat_video = MysqlHelper.get_values(
			
 
				+        #     log_type=self.mode, crawler=self.platform, env=self.env, sql=sql, action=""
			
 
				+        # )
			
 
				+        # if repeat_video:
			
 
				+        #     message = "重复的视频"
			
 
				+        #     print(message)
			
 
				+        #     return False
			
 
				+        return True
			
 
				+
			
 
				+    def process_item(self):
			
 
				+        if not self.publish_time_flag():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        if not self.title_flag():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        if not self.repeat_video():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        if not self.download_rule_flag():
			
 
				+            # 记录相关日志
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
--- a/configs/__init__.py
+++ b/configs/__init__.py
--- a/configs/codes.py
+++ b/configs/codes.py
@@ -0,0 +1,61 @@
 
				+# crawler_status/codes.py
			
 
				+
			
 
				+# 成功
			
 
				+SUCCESS = "1000"
			
 
				+
			
 
				+# 参数配置错误
			
 
				+CONFIG_MISSING = "2000"
			
 
				+PARAM_REQUIRED = "2001"
			
 
				+UNSUPPORTED_TYPE = "2002"
			
 
				+URL_JOIN_FAILED = "2003"
			
 
				+CUSTOM_CLASS_IMPORT_FAILED = "2004"
			
 
				+CONFIG_LOAD_FAILED = "2005"
			
 
				+
			
 
				+# 抓取错误
			
 
				+FETCH_EXCEPTION = "3000"
			
 
				+FETCH_EMPTY = "3001"
			
 
				+HTTP_ERROR = "3002"
			
 
				+TIMEOUT = "3003"
			
 
				+INVALID_FORMAT = "3004"
			
 
				+BLOCKED = "3005"
			
 
				+REDIRECT_ERROR = "3006"
			
 
				+
			
 
				+# 解析处理
			
 
				+JSONPATH_FAIL = "3100"
			
 
				+XPATH_FAIL = "3101"
			
 
				+FIELD_MAP_ERROR = "3102"
			
 
				+PARSE_EMPTY = "3103"
			
 
				+FORMAT_INVALID = "3104"
			
 
				+
			
 
				+# 清洗转化
			
 
				+CLEAN_MISMATCH = "3200"
			
 
				+TRANSFORM_FAIL = "3201"
			
 
				+MISSING_REQUIRED_FIELD = "3202"
			
 
				+
			
 
				+# 数据写入
			
 
				+DB_WRITE_FAIL = "4000"
			
 
				+DB_DUPLICATE = "4001"
			
 
				+DB_CONN_FAIL = "4002"
			
 
				+FILE_WRITE_FAIL = "4003"
			
 
				+
			
 
				+# ETL
			
 
				+ETL_IMPORT_FAIL = "4100"
			
 
				+ETL_RUN_FAIL = "4101"
			
 
				+ETL_UNKNOWN_ERROR = "4102"
			
 
				+
			
 
				+# 系统
			
 
				+UNKNOWN_ERROR = "5000"
			
 
				+IMPORT_ERROR = "5001"
			
 
				+DYNAMIC_LOAD_ERROR = "5002"
			
 
				+FILE_NOT_FOUND = "5003"
			
 
				+
			
 
				+# 业务
			
 
				+DATA_EXISTS = "6000"
			
 
				+NO_UPDATE = "6001"
			
 
				+FILTERED = "6002"
			
 
				+
			
 
				+# 重试
			
 
				+RETRY = "7000"
			
 
				+RETRY_MAX = "7001"
			
 
				+
			
 
				+
			
--- a/configs/config.py
+++ b/configs/config.py
@@ -0,0 +1 @@
 
				+base_url="http://8.217.192.46:8889"
			
--- a/configs/messages.py
+++ b/configs/messages.py
@@ -0,0 +1,52 @@
 
				+# crawler_status/messages.py
			
 
				+
			
 
				+from .codes import *
			
 
				+
			
 
				+MESSAGES = {
			
 
				+    SUCCESS: "成功",
			
 
				+    CONFIG_MISSING: "配置缺失或无效",
			
 
				+    PARAM_REQUIRED: "缺少必要参数",
			
 
				+    UNSUPPORTED_TYPE: "不支持的爬虫类型",
			
 
				+    URL_JOIN_FAILED: "URL 拼接失败",
			
 
				+    CUSTOM_CLASS_IMPORT_FAILED: "自定义类加载失败",
			
 
				+    CONFIG_LOAD_FAILED: "配置文件读取失败",
			
 
				+
			
 
				+    FETCH_EXCEPTION: "抓取单条视频失败，请求异常",
			
 
				+    FETCH_EMPTY: "抓取返回空数据",
			
 
				+    HTTP_ERROR: "HTTP 状态码异常",
			
 
				+    TIMEOUT: "请求超时",
			
 
				+    INVALID_FORMAT: "无效的响应格式",
			
 
				+    BLOCKED: "被目标站封禁或滑块验证",
			
 
				+    REDIRECT_ERROR: "请求被重定向异常",
			
 
				+
			
 
				+    JSONPATH_FAIL: "JSONPath 提取失败",
			
 
				+    XPATH_FAIL: "HTML XPath 提取失败",
			
 
				+    FIELD_MAP_ERROR: "字段映射缺失或类型错误",
			
 
				+    PARSE_EMPTY: "解析后结果为空",
			
 
				+    FORMAT_INVALID: "数据格式校验失败",
			
 
				+
			
 
				+    CLEAN_MISMATCH: "清洗规则不匹配",
			
 
				+    TRANSFORM_FAIL: "数据转化失败",
			
 
				+    MISSING_REQUIRED_FIELD: "字段缺失导致中断",
			
 
				+
			
 
				+    DB_WRITE_FAIL: "写入数据库失败",
			
 
				+    DB_DUPLICATE: "主键冲突或重复数据",
			
 
				+    DB_CONN_FAIL: "数据库连接失败",
			
 
				+    FILE_WRITE_FAIL: "写入本地文件失败",
			
 
				+
			
 
				+    ETL_IMPORT_FAIL: "ETL 模块导入失败",
			
 
				+    ETL_RUN_FAIL: "process_video_obj 执行失败",
			
 
				+    ETL_UNKNOWN_ERROR: "ETL 处理逻辑异常",
			
 
				+
			
 
				+    UNKNOWN_ERROR: "未知系统错误",
			
 
				+    IMPORT_ERROR: "模块导入错误",
			
 
				+    DYNAMIC_LOAD_ERROR: "动态类加载失败",
			
 
				+    FILE_NOT_FOUND: "路径错误或文件不存在",
			
 
				+
			
 
				+    DATA_EXISTS: "视频内容已存在，跳过",
			
 
				+    NO_UPDATE: "当前无更新内容",
			
 
				+    FILTERED: "需人工校验的内容被过滤",
			
 
				+
			
 
				+    RETRY: "触发重试机制",
			
 
				+    RETRY_MAX: "最大重试次数已达，终止任务",
			
 
				+}
			
--- a/configs/spiders_config.yaml
+++ b/configs/spiders_config.yaml
@@ -0,0 +1,59 @@
 
				+default:
			
 
				+  base_url: http://8.217.192.46:8889
			
 
				+  request_timeout: 30
			
 
				+  headers:
			
 
				+    {"Content-Type": "application/json"}
			
 
				+benshanzhufu:
			
 
				+  mode: recommend
			
 
				+  path: /crawler/ben_shan_zhu_fu/recommend
			
 
				+  method: post
			
 
				+  request_body:
			
 
				+    cursor: "1"
			
 
				+  paging: true
			
 
				+  max_pages: 5
			
 
				+  etl_hook: "process_video_obj"
			
 
				+  response_parse:
			
 
				+    next_cursor: "$.data.next_cursor"
			
 
				+    data_path: "$.data.data"
			
 
				+    fields:
			
 
				+      video_id: "$.nid"
			
 
				+      video_title: "$.title"
			
 
				+      play_cnt: 0
			
 
				+      publish_time_stamp: "$.update_time"
			
 
				+      out_user_id: "$.nid"
			
 
				+      cover_url: "$.video_cover"
			
 
				+      like_cnt: 0
			
 
				+      video_url: "$.video_url"
			
 
				+      out_video_id: "$.nid"
			
 
				+
			
 
				+
			
 
				+zhongqingkandian:
			
 
				+  mode: recommend
			
 
				+  path: "/zqkd"
			
 
				+  paging: true
			
 
				+  max_pages: 5
			
 
				+  db_config:
			
 
				+    table: "zhongqingkandian"
			
 
				+  etl_hook: "process_video_obj"
			
 
				+  parse:
			
 
				+    data_path: "$.data[*]"
			
 
				+    fields:
			
 
				+      title: "$.title"
			
 
				+      vid: "$.id"
			
 
				+      cover: "$.cover"
			
 
				+      url: "$.video_url"
			
 
				+  custom_class: my_crawlers.ZhongqingKandianCrawler
			
 
				+
			
 
				+fuqihaoyundao:
			
 
				+  url: "/fuqi"
			
 
				+  method: "POST"
			
 
				+  paging: false
			
 
				+  retry_times: 2
			
 
				+  etl_hook: "process_video_obj"
			
 
				+  parse:
			
 
				+    data_path: "$.videos[*]"
			
 
				+    fields:
			
 
				+      id: "$.id"
			
 
				+      name: "$.name"
			
 
				+      mp4: "$.url"
			
 
				+
			
--- a/configs/topic_map.yaml
+++ b/configs/topic_map.yaml
@@ -0,0 +1,3 @@
 
				+topics:
			
 
				+  - bszf_recommend_prod
			
 
				+  - zqkd_recommend_prod
			
--- a/crawler_worker/__init__.py
+++ b/crawler_worker/__init__.py
--- a/crawler_worker/rabbitmq_consumer.py
+++ b/crawler_worker/rabbitmq_consumer.py
@@ -0,0 +1,90 @@
 
				+import pika
			
 
				+import asyncio
			
 
				+import json
			
 
				+from .universal_crawler import AsyncCrawler
			
 
				+from .utils.log_config import setup_logger
			
 
				+
			
 
				+
			
 
				+class RabbitMQConsumer:
			
 
				+    def __init__(self, config_path: str):
			
 
				+        self.config_path = config_path
			
 
				+        self.aliyun_log = setup_logger("rabbitmq_consumer", "system")
			
 
				+        self.consumer_tag = None
			
 
				+
			
 
				+    def connect(self):
			
 
				+        """连接到RabbitMQ"""
			
 
				+        try:
			
 
				+            with open('config/rabbitmq_config.yaml', 'r', encoding='utf-8') as f:
			
 
				+                rabbit_config = json.load(f)
			
 
				+
			
 
				+            self.connection = pika.BlockingConnection(
			
 
				+                pika.ConnectionParameters(
			
 
				+                    host=rabbit_config.get('host', 'localhost'),
			
 
				+                    port=rabbit_config.get('port', 5672),
			
 
				+                    credentials=pika.PlainCredentials(
			
 
				+                        rabbit_config.get('username', 'guest'),
			
 
				+                        rabbit_config.get('password', 'guest')
			
 
				+                    )
			
 
				+                )
			
 
				+            )
			
 
				+            self.channel = self.connection.channel()
			
 
				+            self.aliyun_log.info("成功连接到RabbitMQ")
			
 
				+            return True
			
 
				+        except Exception as e:
			
 
				+            self.aliyun_log.error(f"连接RabbitMQ失败: {str(e)}")
			
 
				+            return False
			
 
				+
			
 
				+    async def process_message(self, ch, method, properties, body):
			
 
				+        """处理消息"""
			
 
				+        task = json.loads(body)
			
 
				+        self.aliyun_log.info(f"收到任务: {task.get('task_id', '未知ID')}")
			
 
				+
			
 
				+        platform = task.get('platform', 'unknown_platform')
			
 
				+        mode = task.get('mode', 'recommend')
			
 
				+
			
 
				+        crawler = AsyncCrawler(platform, mode, self.config_path)
			
 
				+        try:
			
 
				+            await crawler.run()
			
 
				+            ch.basic_ack(delivery_tag=method.delivery_tag)
			
 
				+            self.aliyun_log.info(f"任务完成: {task.get('task_id', '未知ID')}")
			
 
				+        except Exception as e:
			
 
				+            self.aliyun_log.error(f"处理任务异常: {str(e)}")
			
 
				+            # 重新排队
			
 
				+            ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)
			
 
				+
			
 
				+    def start_consuming(self):
			
 
				+        """开始消费消息"""
			
 
				+        if not self.connect():
			
 
				+            return
			
 
				+
			
 
				+        queue_name = self.setup_queue()
			
 
				+        if not queue_name:
			
 
				+            return
			
 
				+
			
 
				+        try:
			
 
				+            self.channel.basic_consume(
			
 
				+                queue=queue_name,
			
 
				+                on_message_callback=self._sync_process_message,
			
 
				+                auto_ack=False
			
 
				+            )
			
 
				+            self.aliyun_log.info(f"开始消费队列: {queue_name}")
			
 
				+            self.channel.start_consuming()
			
 
				+        except KeyboardInterrupt:
			
 
				+            self.channel.stop_consuming()
			
 
				+        except Exception as e:
			
 
				+            self.aliyun_log.error(f"消费消息失败: {str(e)}")
			
 
				+        finally:
			
 
				+            self.connection.close()
			
 
				+
			
 
				+    def _sync_process_message(self, ch, method, properties, body):
			
 
				+        """同步包装异步处理函数"""
			
 
				+        asyncio.run(self.process_message(ch, method, properties, body))
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    consumer = RabbitMQConsumer("config/platform_config.yaml")
			
 
				+    consumer.start_consuming()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/crawler_worker/universal_crawler.py
+++ b/crawler_worker/universal_crawler.py
@@ -0,0 +1,205 @@
 
				+import os
			
 
				+import sys
			
 
				+import json
			
 
				+import random
			
 
				+import time
			
 
				+import uuid
			
 
				+import yaml
			
 
				+import requests
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, Any, List, Optional
			
 
				+from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
			
 
				+from utils.extractors import safe_extract,extract_multiple
			
 
				+
			
 
				+# 添加公共模块路径
			
 
				+sys.path.append(os.getcwd())
			
 
				+print(os.getcwd())
			
 
				+
			
 
				+from application.items import VideoItem
			
 
				+from application.pipeline import PiaoQuanPipeline
			
 
				+from application.common.messageQueue import MQ
			
 
				+from application.common.log import AliyunLogger
			
 
				+# from application.common.mysql import MysqlHelper
			
 
				+from configs.messages import MESSAGES
			
 
				+from configs import codes
			
 
				+from utils.config_loader import ConfigLoader
			
 
				+from application.common.log import Local
			
 
				+from configs.config import base_url
			
 
				+
			
 
				+
			
 
				+class UniversalCrawler:
			
 
				+    """通用爬虫类，通过YAML配置驱动不同平台的爬取逻辑"""
			
 
				+
			
 
				+    def __init__(self, platform: str, mode: str, rule_dict: Dict, user_list: List, env: str = "prod"):
			
 
				+        """
			
 
				+        初始化爬虫
			
 
				+        :param platform: 平台名称（对应YAML文件名）
			
 
				+        :param env: 运行环境
			
 
				+        """
			
 
				+        self.platform = platform
			
 
				+        self.mode = mode
			
 
				+        self.rule_dict = rule_dict
			
 
				+        self.user_list = user_list
			
 
				+        self.env = env
			
 
				+        self.config_path = "/Users/zhangliang/Documents/piaoquan/AutoScraperX/configs/spiders_config.yaml"
			
 
				+        self.config = ConfigLoader().get_platform_config(self.platform)
			
 
				+        self.aliyun_log = AliyunLogger(platform=platform, mode=self.config["mode"])
			
 
				+        self.mq = MQ(topic_name=f"topic_crawler_etl_{env}")
			
 
				+        # self.mysql = MysqlHelper(mode=self.config["mode"], platform=platform)
			
 
				+        self.logger = Local.init_logger(platform=self.platform, mode=self.mode, log_level="INFO", log_to_console=True)
			
 
				+        self.download_cnt = 0
			
 
				+        self.limit_flag = False
			
 
				+        self.base_api = base_url
			
 
				+
			
 
				+    @retry(
			
 
				+        stop=stop_after_attempt(3),  # 最多重试 3 次
			
 
				+        wait=wait_fixed(2),  # 每次重试间隔 2 秒
			
 
				+        retry=retry_if_exception_type((requests.RequestException, ValueError))
			
 
				+    )
			
 
				+    def _send_request(self, method: str, url: str, headers, payload, timeout = 30) -> Optional[
			
 
				+        Dict]:
			
 
				+        """发送API请求，失败自动重试最多3次"""
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.request(
			
 
				+                method=method,
			
 
				+                url=url,
			
 
				+                headers=headers,
			
 
				+                json=payload,
			
 
				+                timeout=timeout
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            resp = response.json()
			
 
				+            if resp["code"] == 0:
			
 
				+                return response.json()
			
 
				+            raise ValueError(f"接口响应非0：{resp}")
			
 
				+        except Exception as e:
			
 
				+            # 在最后一次失败时才记录日志
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="3000",
			
 
				+                message=f"请求失败: {url}",
			
 
				+                data={"error": str(e)}
			
 
				+            )
			
 
				+            return
			
 
				+
			
 
				+    def _process_video(self, video_data: Dict) -> bool:
			
 
				+        """处理单个视频数据"""
			
 
				+        # 从配置中获取字段映射
			
 
				+        field_map = self.config["response_parse"]["fields"]
			
 
				+
			
 
				+        # 创建视频项
			
 
				+        item = VideoItem()
			
 
				+        for field_name, path in field_map.items():
			
 
				+            if isinstance(path, str) and path.startswith("$."):
			
 
				+
			
 
				+                match = safe_extract(video_data,path)
			
 
				+                item.add_video_info(field_name, match)
			
 
				+            else:
			
 
				+                # 如果是固定值（int、str等），直接使用
			
 
				+                item.add_video_info(field_name,path)
			
 
				+
			
 
				+        # 添加固定字段
			
 
				+        item.add_video_info("platform", self.platform)
			
 
				+        item.add_video_info("strategy", self.config["mode"])
			
 
				+        item.add_video_info("session", f"{self.platform}-{int(time.time())}")
			
 
				+
			
 
				+        # 随机选择一个用户
			
 
				+        our_user = random.choice(self.user_list)
			
 
				+        item.add_video_info("user_id", our_user["uid"])
			
 
				+        item.add_video_info("user_name", our_user["nick_name"])
			
 
				+
			
 
				+        print(item)
			
 
				+
			
 
				+        # 处理管道
			
 
				+        trace_id = f"{self.platform}-{uuid.uuid4()}"
			
 
				+        pipeline = PiaoQuanPipeline(
			
 
				+            platform=self.platform,
			
 
				+            mode=self.config["mode"],
			
 
				+            rule_dict=self.rule_dict,
			
 
				+            env=self.env,
			
 
				+            item=item.produce_item(),
			
 
				+            trace_id=trace_id,
			
 
				+        )
			
 
				+
			
 
				+        if pipeline.process_item():
			
 
				+            self.download_cnt += 1
			
 
				+            self.mq.send_msg(item.produce_item())
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="1002",
			
 
				+                message="成功发送至ETL",
			
 
				+                data=item.produce_item()
			
 
				+            )
			
 
				+
			
 
				+            # 检查下载限制
			
 
				+            min_limit = self.config.get("download_limit", {}).get("min", 200)
			
 
				+            if self.download_cnt >= min_limit:
			
 
				+                self.limit_flag = True
			
 
				+                self.aliyun_log.logging(
			
 
				+                    code="2000",
			
 
				+                    message=f"达到下载限制: {min_limit}",
			
 
				+                )
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+    # --------------------- 自定义处理函数 ---------------------
			
 
				+    def _func_current_timestamp(self, _) -> int:
			
 
				+        """获取当前时间戳"""
			
 
				+        return int(time.time())
			
 
				+
			
 
				+    def _func_formatted_time(self, _) -> str:
			
 
				+        """获取格式化时间"""
			
 
				+        return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+
			
 
				+    def _func_random_delay(self, _) -> None:
			
 
				+        """随机延迟"""
			
 
				+        min_delay = self.config.get("delay", {}).get("min", 3)
			
 
				+        max_delay = self.config.get("delay", {}).get("max", 8)
			
 
				+        time.sleep(random.randint(min_delay, max_delay))
			
 
				+        return None
			
 
				+
			
 
				+    def run(self):
			
 
				+        """执行爬取任务"""
			
 
				+        self.logger.info(f"开始执行爬虫{self.platform}")
			
 
				+
			
 
				+        while not self.limit_flag:
			
 
				+            # 获取初始列表数据
			
 
				+            initial_data = self._send_request(
			
 
				+                self.config["method"].upper(),
			
 
				+                self.config["url"],
			
 
				+                self.config.get("headers", {}),
			
 
				+                self.config.get("request_body", {})
			
 
				+            )
			
 
				+            print(initial_data)
			
 
				+
			
 
				+            if not initial_data:
			
 
				+                return
			
 
				+            video_objs = safe_extract(initial_data,self.config["response_parse"]["data_path"])
			
 
				+            self.logger.info(f"获取到的视频列表：{json.dumps(video_objs)}")
			
 
				+
			
 
				+            next_cursor = None
			
 
				+            # 处理视频列表
			
 
				+            video_list = safe_extract(
			
 
				+                initial_data,
			
 
				+                self.config["response_parse"]["data_path"]
			
 
				+            )
			
 
				+
			
 
				+            for video_data in video_list:
			
 
				+                self.logger.info(f"视频对象{video_data}")
			
 
				+                if self.limit_flag:
			
 
				+                    break
			
 
				+                self._process_video(video_data)
			
 
				+
			
 
				+                # 执行额外操作（如曝光上报）
			
 
				+                for action in self.config.get("post_actions", []):
			
 
				+                    if action["trigger"] == "after_video_processed":
			
 
				+                        self._send_request(action["endpoint"], action.get("payload", {}))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    cr = UniversalCrawler("benshanzhufu", "recommend",
			
 
				+                          rule_dict={'videos_cnt': {'min': 500, 'max': 0}, 'duration': {'min': 30, 'max': 1200}},
			
 
				+                          user_list=[{"uid": 20631262, "link": "recommend_2060", "nick_name": "人老心不老"}])
			
 
				+
			
 
				+    cr.run()
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,76 @@
 
				+import threading
			
 
				+import traceback
			
 
				+import json
			
 
				+import time
			
 
				+import uuid
			
 
				+
			
 
				+from application.common import AliyunLogger, get_consumer, ack_message
			
 
				+from application.common.log import Local
			
 
				+from application.spiders.universal_crawler import UniversalCrawler
			
 
				+from application.config import TopicGroup
			
 
				+from application.service.user_service import get_user_list
			
 
				+from application.service.rule_service import get_rule_dict
			
 
				+
			
 
				+def generate_trace_id():
			
 
				+    return f"{uuid.uuid4().hex}{int(time.time() * 1000)}"
			
 
				+
			
 
				+def handle_message(topic: str):
			
 
				+    consumer = get_consumer(topic)
			
 
				+    logger = AliyunLogger(platform=topic, mode="unknown")
			
 
				+
			
 
				+    while True:
			
 
				+        try:
			
 
				+            messages = consumer.consume_message(wait_seconds=10, batch_size=1)
			
 
				+            if not messages:
			
 
				+                continue
			
 
				+
			
 
				+            for message in messages:
			
 
				+                trace_id = generate_trace_id()
			
 
				+                body = message.message_body
			
 
				+
			
 
				+                try:
			
 
				+                    payload = json.loads(body)
			
 
				+                    platform = payload["platform"]
			
 
				+                    mode = payload.get("mode", "recommend")
			
 
				+                    logger = AliyunLogger(platform=platform, mode=mode)
			
 
				+                    Local.logger(platform, mode).info(f"[trace_id={trace_id}] 收到任务: {body}")
			
 
				+
			
 
				+                    # 加载 user_list 与 rule_dict
			
 
				+                    user_list = get_user_list(platform, mode)
			
 
				+                    rule_dict = get_rule_dict(platform, mode)
			
 
				+
			
 
				+                    # 同步执行 UniversalCrawler
			
 
				+                    crawler = UniversalCrawler(platform, mode, rule_dict, user_list)
			
 
				+                    crawler.run()
			
 
				+
			
 
				+                    # 执行成功后 ack
			
 
				+                    ack_message(mode, platform, message, consumer, trace_id=trace_id)
			
 
				+                    logger.logging(code="1000", message="任务成功完成并确认消息", trace_id=trace_id)
			
 
				+
			
 
				+                except Exception as e:
			
 
				+                    logger.logging(
			
 
				+                        code="9001",
			
 
				+                        message=f"处理消息失败（未确认 ack）: {e}\n{traceback.format_exc()}",
			
 
				+                        trace_id=trace_id,
			
 
				+                        data=body,
			
 
				+                    )
			
 
				+                    # 不 ack，等待下次重试
			
 
				+        except Exception as err:
			
 
				+            logger.logging(code="9002", message=f"消费失败: {err}\n{traceback.format_exc()}")
			
 
				+        time.sleep(2)
			
 
				+
			
 
				+def main():
			
 
				+    topic_list = TopicGroup().topics
			
 
				+    print(f"监听 Topics：{topic_list}")
			
 
				+
			
 
				+    threads = []
			
 
				+    for topic in topic_list:
			
 
				+        t = threading.Thread(target=handle_message, args=(topic,))
			
 
				+        t.start()
			
 
				+        threads.append(t)
			
 
				+
			
 
				+    for t in threads:
			
 
				+        t.join()
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/pipelines/__init__.py
+++ b/pipelines/__init__.py
--- a/scheduler/__init__.py
+++ b/scheduler/__init__.py
--- a/scheduler/scheduler_main.py
+++ b/scheduler/scheduler_main.py
@@ -0,0 +1,45 @@
 
				+# scheduler_main.py - 爬虫调度主程序
			
 
				+import asyncio
			
 
				+import json
			
 
				+import time
			
 
				+import traceback
			
 
				+import sys
			
 
				+import os
			
 
				+from crawler_controller import CrawlerController
			
 
				+from application.common.log import Local
			
 
				+from application.common import AliyunLogger
			
 
				+from crawler_worker.universal_crawler import AsyncCrawler
			
 
				+
			
 
				+
			
 
				+async def main():
			
 
				+    """主函数"""
			
 
				+    # 设置日志
			
 
				+    logger = AliyunLogger(platform="system", mode="manager")
			
 
				+
			
 
				+    try:
			
 
				+        # 从环境变量获取配置
			
 
				+        config_topic = os.getenv("CONFIG_TOPIC", "crawler_config")
			
 
				+        config_group = os.getenv("CONFIG_GROUP", "crawler_config_group")
			
 
				+
			
 
				+        # 创建爬虫控制器
			
 
				+        controller = AsyncCrawler(
			
 
				+            platform: str,
			
 
				+            mode: str,
			
 
				+        )
			
 
				+        # 启动控制器
			
 
				+        await controller.run()
			
 
				+
			
 
				+        # 保持主线程运行
			
 
				+        while True:
			
 
				+            await asyncio.sleep(60)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        tb = traceback.format_exc()
			
 
				+        message = f"主程序发生错误: {e}\n{tb}"
			
 
				+        logger.logging(code="1006", message=message)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 运行主事件循环
			
 
				+    asyncio.run(main())
			
--- a/utils/__init__.py
+++ b/utils/__init__.py
--- a/utils/config_loader.py
+++ b/utils/config_loader.py
@@ -0,0 +1,37 @@
 
				+import yaml
			
 
				+import os
			
 
				+from urllib.parse import urljoin
			
 
				+from utils.project_paths import config_spiders_path
			
 
				+
			
 
				+
			
 
				+class ConfigLoader:
			
 
				+    def __init__(self, config_path=config_spiders_path):
			
 
				+        if not os.path.exists(config_path):
			
 
				+            raise FileNotFoundError(f"[配置错误] 找不到配置文件: {config_path}")
			
 
				+        self.config_path = config_path
			
 
				+        self.config = self._load_yaml()
			
 
				+
			
 
				+    def _load_yaml(self):
			
 
				+        with open(self.config_path, "r", encoding="utf-8") as f:
			
 
				+            return yaml.safe_load(f)
			
 
				+
			
 
				+    def get_platform_config(self, platform: str) -> dict:
			
 
				+        """获取平台配置，并拼接完整 URL"""
			
 
				+        if platform not in self.config:
			
 
				+            raise ValueError(f"[配置错误] 未找到平台配置: {platform}")
			
 
				+
			
 
				+        platform_config = self.config.get(platform, {})
			
 
				+        base_config = self.config.get("default", {})
			
 
				+
			
 
				+        # 合并配置：平台配置覆盖默认配置
			
 
				+        merged = {**base_config, **platform_config}
			
 
				+
			
 
				+        # 自动拼接完整 url（优先用完整 url）
			
 
				+        if "url" not in merged and "base_url" in merged and "path" in merged:
			
 
				+            merged["url"] = urljoin(merged["base_url"], merged["path"])
			
 
				+
			
 
				+        return merged
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    config = ConfigLoader().get_platform_config("benshanzhufu")
			
 
				+    print(config)
			
--- a/utils/extractors.py
+++ b/utils/extractors.py
@@ -0,0 +1,29 @@
 
				+from jsonpath_ng import parse
			
 
				+
			
 
				+def safe_extract(json_obj, path, default=None):
			
 
				+    """
			
 
				+    安全提取单个字段值，返回匹配到的第一个，否则返回默认值。
			
 
				+
			
 
				+    :param json_obj: 输入的 JSON 对象
			
 
				+    :param path: JSONPath 表达式
			
 
				+    :param default: 提取失败时返回的默认值
			
 
				+    :return: 提取结果或默认值
			
 
				+    """
			
 
				+    try:
			
 
				+        jsonpath_expr = parse(path)
			
 
				+        match = jsonpath_expr.find(json_obj)
			
 
				+        if match:
			
 
				+            return match[0].value
			
 
				+    except Exception as e:
			
 
				+        print(f"[extractor] Error extracting {path}: {e}")
			
 
				+    return default
			
 
				+
			
 
				+def extract_multiple(json_obj, fields: dict) -> dict:
			
 
				+    """
			
 
				+    根据字段配置提取多个字段。
			
 
				+
			
 
				+    :param json_obj: 输入的 JSON 对象
			
 
				+    :param fields: 字段配置，如 {"title": "$.title", "id": "$.id"}
			
 
				+    :return: 字段名 -> 提取值的字典
			
 
				+    """
			
 
				+    return {key: safe_extract(json_obj, path) for key, path in fields.items()}
			
--- a/utils/path_utils.py
+++ b/utils/path_utils.py
@@ -0,0 +1,10 @@
 
				+import os
			
 
				+
			
 
				+def get_project_path() -> str:
			
 
				+    """
			
 
				+    获取 AutoScraperX 项目根路径
			
 
				+    """
			
 
				+    return os.path.dirname(os.path.abspath(__file__)).split("AutoScraperX")[0] + "AutoScraperX"
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print( get_project_path())
			
--- a/utils/project_paths.py
+++ b/utils/project_paths.py
@@ -0,0 +1,31 @@
 
				+import os
			
 
				+from utils.path_utils import get_project_path
			
 
				+
			
 
				+# 项目根目录
			
 
				+project_root = get_project_path()
			
 
				+
			
 
				+# 配置文件路径
			
 
				+config_dir = os.path.join(project_root, "configs")
			
 
				+config_spiders_path = os.path.join(config_dir, "spiders_config.yaml")
			
 
				+
			
 
				+# 日志路径（根路径 + log_store）
			
 
				+log_dir = os.path.join(project_root, "log_store")
			
 
				+
			
 
				+# 模型路径（如有）
			
 
				+model_dir = os.path.join(project_root, "models")
			
 
				+
			
 
				+# 临时文件、缓存目录
			
 
				+tmp_dir = os.path.join(project_root, "tmp")
			
 
				+
			
 
				+# 其他路径可按需添加
			
 
				+# db_config_path = os.path.join(config_dir, "db.yaml")
			
 
				+
			
 
				+# 导出路径变量
			
 
				+__all__ = [
			
 
				+    "project_root",
			
 
				+    "config_dir",
			
 
				+    "config_spiders_path",
			
 
				+    "log_dir",
			
 
				+    "model_dir",
			
 
				+    "tmp_dir",
			
 
				+]