2 years ago · 88b75775bd
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.MD
+++ b/README.MD
@@ -18,9 +18,20 @@ ${nohup_dir}:       nohup日志存储路径，如: ./youtube/nohup.log
 
				 
			
 
				 ### 已上线爬虫运行命令示例
			
 
				 ```
			
 
				+西瓜视频运行命令: 
			
 
				+阿里云 102 服务器
			
 
				+sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" xigua/nohup.log
			
 
				+本机
			
 
				+sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="out" --env="dev" --machine="local" xigua/nohup.log
			
 
				+杀进程命令：
			
 
				+ps aux | grep run_xigua | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				+
			
 
				 youtube定向榜运行命令: 
			
 
				 sh ./main/main.sh ./youtube/youtube_main/run_youtube_follow.py --log_type="follow" --crawler="youtube" --strategy="定向爬虫策略" --oss_endpoint="hk" --env="prod" --machine="aliyun_hk" youtube/nohup.log
			
 
				-youtube定向榜杀进程命令: 
			
 
				+youtube杀进程命令: 
			
 
				 ps aux | grep run_youtube | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				-ps aux | grep run_youtube | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				+
			
 
				+微信指数杀进程
			
 
				+ps aux | grep run_weixinzhishu | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				+
			
 
				 ```
			
--- a/common/common.py
+++ b/common/common.py
@@ -12,6 +12,7 @@ import time
 
				 import requests
			
 
				 import ffmpeg
			
 
				 import urllib3
			
 
				+import subprocess
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
 
				 
			
--- a/common/demo.py
+++ b/common/demo.py
@@ -1,16 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2023/2/10
			
 
				-import time
			
 
				-
			
 
				-
			
 
				-class Demo:
			
 
				-    @classmethod
			
 
				-    def test_time(cls):
			
 
				-        time_str = '2023-02-07'
			
 
				-        time_stamp = int(time.mktime(time.strptime(time_str, "%Y-%m-%d")))
			
 
				-        print(time_stamp)
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Demo.test_time()
			
--- a/common/feishu.py
+++ b/common/feishu.py
@@ -69,12 +69,6 @@ class Feishu:
 
				     # 微信指数_搜索词
			
 
				     weixinzhishu_search_word = 'https://w42nne6hzg.feishu.cn/sheets/shtcnHxCj6dZBYMuK1Q3tIJVlqg?'
			
 
				 
			
 
				-    # 手机号
			
 
				-    wangkun = "13426262515"
			
 
				-    gaonannan = "18501180073"
			
 
				-    xinxin = "15546206651"
			
 
				-    huxinxue = "18832292015"
			
 
				-
			
 
				     # 飞书路径token
			
 
				     @classmethod
			
 
				     def spreadsheettoken(cls, crawler):
			
@@ -158,18 +152,18 @@ class Feishu:
 
				         获取表格元数据
			
 
				         :return:
			
 
				         """
			
 
				-        get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				-                           + cls.spreadsheettoken(crawler) + "/metainfo"
			
 
				-
			
 
				-        headers = {
			
 
				-            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				-            "Content-Type": "application/json; charset=utf-8"
			
 
				-        }
			
 
				-        params = {
			
 
				-            "extFields": "protectedRange",  # 额外返回的字段，extFields=protectedRange时返回保护行列信息
			
 
				-            "user_id_type": "open_id"  # 返回的用户id类型，可选open_id,union_id
			
 
				-        }
			
 
				         try:
			
 
				+            get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                               + cls.spreadsheettoken(crawler) + "/metainfo"
			
 
				+
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                "extFields": "protectedRange",  # 额外返回的字段，extFields=protectedRange时返回保护行列信息
			
 
				+                "user_id_type": "open_id"  # 返回的用户id类型，可选open_id,union_id
			
 
				+            }
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				             response = json.loads(r.content.decode("utf8"))
			
@@ -187,29 +181,29 @@ class Feishu:
 
				         :param sheetid: 哪张表
			
 
				         :return: 所有数据
			
 
				         """
			
 
				-        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				-                               + cls.spreadsheettoken(crawler) + "/values_batch_get"
			
 
				-        headers = {
			
 
				-            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				-            "Content-Type": "application/json; charset=utf-8"
			
 
				-        }
			
 
				-        params = {
			
 
				-            # 多个查询范围 如 url?ranges=range1,range2 ，其中 range 包含 sheetId 与单元格范围两部分
			
 
				-            "ranges": sheetid,
			
 
				-
			
 
				-            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				-            # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				-            # valueRenderOption=Formula单元格中含有公式时返回公式本身；
			
 
				-            # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
			
 
				-            "valueRenderOption": "ToString",
			
 
				-
			
 
				-            # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				-            "dateTimeRenderOption": "",
			
 
				-
			
 
				-            # 返回的用户id类型，可选open_id,union_id
			
 
				-            "user_id_type": "open_id"
			
 
				-        }
			
 
				         try:
			
 
				+            get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                   + cls.spreadsheettoken(crawler) + "/values_batch_get"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                # 多个查询范围 如 url?ranges=range1,range2 ，其中 range 包含 sheetId 与单元格范围两部分
			
 
				+                "ranges": sheetid,
			
 
				+
			
 
				+                # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+                # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+                # valueRenderOption=Formula单元格中含有公式时返回公式本身；
			
 
				+                # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
			
 
				+                "valueRenderOption": "ToString",
			
 
				+
			
 
				+                # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+                "dateTimeRenderOption": "",
			
 
				+
			
 
				+                # 返回的用户id类型，可选open_id,union_id
			
 
				+                "user_id_type": "open_id"
			
 
				+            }
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				             # print(r.text)
			
@@ -231,22 +225,23 @@ class Feishu:
 
				         :param startindex:开始位置
			
 
				         :param endindex:结束位置
			
 
				         """
			
 
				-        insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				-                             + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
			
 
				-        headers = {
			
 
				-            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				-            "Content-Type": "application/json; charset=utf-8"
			
 
				-        }
			
 
				-        body = {
			
 
				-            "dimension": {
			
 
				-                "sheetId": sheetid,
			
 
				-                "majorDimension": majordimension,  # 默认 ROWS ，可选 ROWS、COLUMNS
			
 
				-                "startIndex": startindex,  # 开始的位置
			
 
				-                "endIndex": endindex  # 结束的位置
			
 
				-            },
			
 
				-            "inheritStyle": "AFTER"  # BEFORE 或 AFTER，不填为不继承 style
			
 
				-        }
			
 
				         try:
			
 
				+            insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                 + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "dimension": {
			
 
				+                    "sheetId": sheetid,
			
 
				+                    "majorDimension": majordimension,  # 默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+                    "startIndex": startindex,  # 开始的位置
			
 
				+                    "endIndex": endindex  # 结束的位置
			
 
				+                },
			
 
				+                "inheritStyle": "AFTER"  # BEFORE 或 AFTER，不填为不继承 style
			
 
				+            }
			
 
				+
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				             Common.logger(log_type, crawler).info("插入行或列:{}", r.json()["msg"])
			
@@ -264,22 +259,21 @@ class Feishu:
 
				         :param ranges:单元格范围
			
 
				         :param values:写入的具体数据，list
			
 
				         """
			
 
				-        update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				-                            + cls.spreadsheettoken(crawler) + "/values_batch_update"
			
 
				-        headers = {
			
 
				-            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				-            "Content-Type": "application/json; charset=utf-8"
			
 
				-        }
			
 
				-        body = {
			
 
				-            "valueRanges": [
			
 
				-                {
			
 
				-                    "range": sheetid + "!" + ranges,
			
 
				-                    "values": values
			
 
				-                },
			
 
				-            ],
			
 
				-        }
			
 
				-
			
 
				         try:
			
 
				+            update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                + cls.spreadsheettoken(crawler) + "/values_batch_update"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "valueRanges": [
			
 
				+                    {
			
 
				+                        "range": sheetid + "!" + ranges,
			
 
				+                        "values": values
			
 
				+                    },
			
 
				+                ],
			
 
				+            }
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				             Common.logger(log_type, crawler).info("写入数据:{}", r.json()["msg"])
			
@@ -296,19 +290,18 @@ class Feishu:
 
				         :param sheetid:哪张工作表
			
 
				         :param ranges:需要合并的单元格范围
			
 
				         """
			
 
				-        merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				-                          + cls.spreadsheettoken(crawler) + "/merge_cells"
			
 
				-        headers = {
			
 
				-            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				-            "Content-Type": "application/json; charset=utf-8"
			
 
				-        }
			
 
				-
			
 
				-        body = {
			
 
				-            "range": sheetid + "!" + ranges,
			
 
				-            "mergeType": "MERGE_ROWS"
			
 
				-        }
			
 
				-
			
 
				         try:
			
 
				+            merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                              + cls.spreadsheettoken(crawler) + "/merge_cells"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+
			
 
				+            body = {
			
 
				+                "range": sheetid + "!" + ranges,
			
 
				+                "mergeType": "MERGE_ROWS"
			
 
				+            }
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				             Common.logger(log_type, crawler).info("合并单元格:{}", r.json()["msg"])
			
@@ -326,26 +319,26 @@ class Feishu:
 
				         :param cell: 哪个单元格
			
 
				         :return: 单元格内容
			
 
				         """
			
 
				-        get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				-                              + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
			
 
				-        headers = {
			
 
				-            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				-            "Content-Type": "application/json; charset=utf-8"
			
 
				-        }
			
 
				-        params = {
			
 
				-            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				-            # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				-            # valueRenderOption=Formula 单元格中含有公式时返回公式本身；
			
 
				-            # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
			
 
				-            "valueRenderOption": "FormattedValue",
			
 
				-
			
 
				-            # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				-            "dateTimeRenderOption": "",
			
 
				-
			
 
				-            # 返回的用户id类型，可选open_id,union_id
			
 
				-            "user_id_type": "open_id"
			
 
				-        }
			
 
				         try:
			
 
				+            get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                  + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            params = {
			
 
				+                # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+                # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+                # valueRenderOption=Formula 单元格中含有公式时返回公式本身；
			
 
				+                # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
			
 
				+                "valueRenderOption": "FormattedValue",
			
 
				+
			
 
				+                # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+                "dateTimeRenderOption": "",
			
 
				+
			
 
				+                # 返回的用户id类型，可选open_id,union_id
			
 
				+                "user_id_type": "open_id"
			
 
				+            }
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				             # print(r.text)
			
@@ -382,21 +375,21 @@ class Feishu:
 
				         :param endindex:结束的位置
			
 
				         :return:
			
 
				         """
			
 
				-        dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				-                              + cls.spreadsheettoken(crawler) + "/dimension_range"
			
 
				-        headers = {
			
 
				-            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				-            "Content-Type": "application/json; charset=utf-8"
			
 
				-        }
			
 
				-        body = {
			
 
				-            "dimension": {
			
 
				-                "sheetId": sheetid,
			
 
				-                "majorDimension": major_dimension,
			
 
				-                "startIndex": startindex,
			
 
				-                "endIndex": endindex
			
 
				-            }
			
 
				-        }
			
 
				         try:
			
 
				+            dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                                  + cls.spreadsheettoken(crawler) + "/dimension_range"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            body = {
			
 
				+                "dimension": {
			
 
				+                    "sheetId": sheetid,
			
 
				+                    "majorDimension": major_dimension,
			
 
				+                    "startIndex": startindex,
			
 
				+                    "endIndex": endindex
			
 
				+                }
			
 
				+            }
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				             Common.logger(log_type, crawler).info("删除视频数据:{}", r.json()["msg"])
			
@@ -412,32 +405,219 @@ class Feishu:
 
				                 "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				                 "Content-Type": "application/json; charset=utf-8"
			
 
				             }
			
 
				+            # 手机号
			
 
				+            wangkun = "13426262515"
			
 
				+            gaonannan = "18501180073"
			
 
				+            xinxin = "15546206651"
			
 
				+            huxinxue = "18832292015"
			
 
				+            wuchaoyue = "15712941385"
			
 
				+            lijinchao = '18524120540'
			
 
				+
			
 
				             if username == "wangkun":
			
 
				-                username = cls.wangkun
			
 
				+                username = wangkun
			
 
				             elif username == "gaonannan":
			
 
				-                username = cls.gaonannan
			
 
				+                username = gaonannan
			
 
				             elif username == "xinxin":
			
 
				-                username = cls.xinxin
			
 
				+                username = xinxin
			
 
				             elif username == "huxinxue":
			
 
				-                username = cls.huxinxue
			
 
				+                username = huxinxue
			
 
				+            elif username == "wuchaoyue":
			
 
				+                username = wuchaoyue
			
 
				+            elif username == "lijinchao":
			
 
				+                username = lijinchao
			
 
				+
			
 
				             data = {"mobiles": [username]}
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
			
 
				             open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
			
 
				-            Common.logger(log_type, crawler).info("{}:{}", username, open_id)
			
 
				+            Common.logger(log_type, crawler).info(f"{username}:{open_id}")
			
 
				             # print(f"{username}:{open_id}")
			
 
				             return open_id
			
 
				         except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error("get_userid异常:{}", e)
			
 
				+            Common.logger(log_type, crawler).error(f"get_userid异常:{e}\n")
			
 
				 
			
 
				     # 飞书机器人
			
 
				     @classmethod
			
 
				-    def bot(cls, log_type, crawler, content):
			
 
				+    def bot(cls, log_type, crawler, text):
			
 
				         try:
			
 
				             url = "https://open.feishu.cn/open-apis/bot/v2/hook/96989577-50e7-4653-9ec2-308fe3f2c5fe"
			
 
				-            headers = {
			
 
				-                'Content-Type': 'application/json'
			
 
				-            }
			
 
				+            headers = {'Content-Type': 'application/json'}
			
 
				+            if crawler == "kanyikan":
			
 
				+                content = "看一看爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "xiaoniangao_hour":
			
 
				+                content = "小年糕_小时级_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+            elif crawler == "xiaoniangao_person":
			
 
				+                content = "小年糕_用户主页_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=Wu0CeL"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+            elif crawler == "xiaoniangao_play":
			
 
				+                content = "小年糕_播放量_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=c85k1C"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == 'xigua':
			
 
				+                content = '西瓜视频_用户主页_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=e075e9'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == 'xigua_little_video':
			
 
				+                content = '西瓜视频_小视频_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=hDSDnv'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == 'zhihu_hot':
			
 
				+                content = '知乎_热门_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?sheet=8871e3'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+            elif crawler == 'zhihu_follow':
			
 
				+                content = '知乎_定向_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?sheet=4MGuux'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == 'haokan_hot':
			
 
				+                content = '好看_热榜_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd?sheet=5pWipX'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == 'haokan_channel':
			
 
				+                content = '好看_频道_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd?sheet=7f05d8'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == 'haokan_follow':
			
 
				+                content = '好看_定向_已下载表'
			
 
				+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd?sheet=kVaSjf'
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "music_album":
			
 
				+                content = "音乐相册爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "ssyy":
			
 
				+                content = "胜胜影音爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnz1ymxHL1u8WHblfqfys7qe"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "ggdc":
			
 
				+                content = "刚刚都传爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnTuJgeZU2bc7VaesAqk3QJx"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "bszf":
			
 
				+                content = "本山祝福爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "jxxf":
			
 
				+                content = "吉祥幸福爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnSx4nafMbLTq7xl7RHBwHBf"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "zmyx":
			
 
				+                content = "众妙音信爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnbZIxstPeM0xshW07b26sve"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "zhufumao":
			
 
				+                content = "祝福猫视频爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnXfIJthvkjhI5zlEJq84i6g"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "gaonannan")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "kuaishou_follow":
			
 
				+                content = "快手_用户主页_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?sheet=fYdA8F"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+            elif crawler == "kuaishou_recommend":
			
 
				+                content = "快手_推荐榜_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?sheet=3cd128"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "ssnnyfq":
			
 
				+                content = "岁岁年年迎福气_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnyJmJSJynHDLLbLTkySfvZe?sheet=290bae"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "kdjsfq":
			
 
				+                content = "看到就是福气_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnEokBkIjOUPAk8vbbPKnXgb?sheet=ad3b6d"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "gzh":
			
 
				+                content = "公众号爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "huxinxue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "gongzhonghao_xinxin":
			
 
				+                content = "公众号_信欣_爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcna98M2mX7TbivTj9Sb7WKBN?"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "weiqun":
			
 
				+                content = "微群爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "weishi":
			
 
				+                content = "微视爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "xinxin")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "shipinhao_recommend":
			
 
				+                content = "视频号_推荐_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?sheet=c77cf9"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == "shipinhao_follow":
			
 
				+                content = "视频号_定向_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?sheet=KsVtLe"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+            elif crawler == "youtube":
			
 
				+                content = "youtube_定向_已下载表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?sheet=GVxlYk"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "wuchaoyue")) + "></at>\n"
			
 
				+
			
 
				+            elif crawler == "zongjiao":
			
 
				+                content = "宗教公众号爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn73NW0CyoOeF21HWO15KBsb"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at> <at id=" + str(
			
 
				+                    cls.get_userid(log_type, crawler, "huxinxue")) + "></at>\n"
			
 
				+
			
 
				+            else:
			
 
				+                content = "小年糕爬虫表"
			
 
				+                sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh"
			
 
				+                users = "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at>\n"
			
 
				+
			
 
				             data = json.dumps({
			
 
				                 "msg_type": "interactive",
			
 
				                 "card": {
			
@@ -448,38 +628,25 @@ class Feishu:
 
				                     "elements": [{
			
 
				                         "tag": "div",
			
 
				                         "text": {
			
 
				-                            "content": "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at>\n" + content,
			
 
				+                            "content": users + text,
			
 
				                             "tag": "lark_md"
			
 
				                         }
			
 
				                     }, {
			
 
				                         "actions": [{
			
 
				                             "tag": "button",
			
 
				                             "text": {
			
 
				-                                "content": "快手爬虫表",
			
 
				+                                "content": content,
			
 
				                                 "tag": "lark_md"
			
 
				                             },
			
 
				-                            "url": "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf",
			
 
				+                            "url": sheet_url,
			
 
				                             "type": "default",
			
 
				                             "value": {}
			
 
				-                        },
			
 
				-                            {
			
 
				-                                "tag": "button",
			
 
				-                                "text": {
			
 
				-                                    "content": "快手Jenkins",
			
 
				-                                    "tag": "lark_md"
			
 
				-                                },
			
 
				-                                "url": "https://jenkins-on.yishihui.com/view/%E7%88%AC%E8%99%AB-Spider/job/%E5%BF%"
			
 
				-                                       "AB%E6%89%8B%E5%B0%8F%E7%A8%8B%E5%BA%8F-%E8%A7%86%E9%A2%91%E7%88%AC%E5%8F%96/",
			
 
				-                                "type": "default",
			
 
				-                                "value": {}
			
 
				-                            }
			
 
				-
			
 
				-                        ],
			
 
				+                        }],
			
 
				                         "tag": "action"
			
 
				                     }],
			
 
				                     "header": {
			
 
				                         "title": {
			
 
				-                            "content": "📣有新的报警，请注意查处",
			
 
				+                            "content": "📣您有新的报警，请注意查收",
			
 
				                             "tag": "plain_text"
			
 
				                         }
			
 
				                     }
			
@@ -487,10 +654,10 @@ class Feishu:
 
				             })
			
 
				             urllib3.disable_warnings()
			
 
				             r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
			
 
				-            Common.logger(log_type, crawler).info("触发机器人消息:{}, {}", r, r.json()["StatusMessage"])
			
 
				+            Common.logger(log_type, crawler).info(f'触发机器人消息:{r}, {r.json()["StatusMessage"]}')
			
 
				         except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error("bot异常:{}", e)
			
 
				+            Common.logger(log_type, crawler).error(f"bot异常:{e}\n")
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    pass
			
 
				+    Feishu.bot('follow', 'xigua', '测试一下，请忽略 ~')
			
--- a/kanyikan/.DS_Store
+++ b/kanyikan/.DS_Store
--- a/weixinzhishu/weixinzhishu_main/get_weixinzhishu.py
+++ b/weixinzhishu/weixinzhishu_main/get_weixinzhishu.py
@@ -194,8 +194,8 @@ class Weixinzhishu:
 
				         wechat_key = cls.get_wechat_key(log_type, crawler)
			
 
				         search_key = wechat_key[0]
			
 
				         openid = wechat_key[-1]
			
 
				-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
			
 
				-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
			
 
				+        end_ymd = (date.today() + timedelta(days=-5)).strftime("%Y%m%d")
			
 
				+        start_ymd = (date.today() + timedelta(days=-9)).strftime("%Y%m%d")
			
 
				         url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
			
 
				         payload = json.dumps({
			
 
				             "openid": openid,
			
@@ -240,6 +240,6 @@ class Weixinzhishu:
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    Weixinzhishu.get_score_test('weixin', 'weixinzhishu', 1 , "社保")
			
 
				+    Weixinzhishu.get_score_test('weixin', 'weixinzhishu', 1 , "俞仁波")
			
 
				 
			
 
				     pass
			
--- a/xigua/.DS_Store
+++ b/xigua/.DS_Store
--- a/xigua/logs/.DS_Store
+++ b/xigua/logs/.DS_Store
--- a/xigua/videos/.DS_Store
+++ b/xigua/videos/.DS_Store
--- a/xigua/xigua_follow/insert_videos.py
+++ b/xigua/xigua_follow/insert_videos.py
@@ -0,0 +1,119 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/23
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+from common.db import MysqlHelper
			
 
				+from common.feishu import Feishu
			
 
				+
			
 
				+
			
 
				+class Insert:
			
 
				+    @classmethod
			
 
				+    def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
			
 
				+        xigua_sheetid_list = ["QOWqMo", "3Ul6wZ", "e075e9"]
			
 
				+        for sheetid in xigua_sheetid_list:
			
 
				+            xigua_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
			
 
				+            for i in range(1, len(xigua_sheet)):
			
 
				+            # for i in range(1, 3):
			
 
				+                if xigua_sheet[i][5] is None:
			
 
				+                    continue
			
 
				+                video_id = xigua_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "")
			
 
				+                if video_id == "None":
			
 
				+                    continue
			
 
				+                video_id = int(video_id)
			
 
				+                user_id = 0
			
 
				+                out_user_id = str(xigua_sheet[i][19])
			
 
				+                platform = "西瓜视频"
			
 
				+                strategy = "定向爬虫策略"
			
 
				+                out_video_id = str(xigua_sheet[i][8])
			
 
				+                video_title = str(xigua_sheet[i][7])
			
 
				+                cover_url = str(xigua_sheet[i][21])
			
 
				+                video_url = str(xigua_sheet[i][22])
			
 
				+                duration = int(xigua_sheet[i][15])
			
 
				+                publish_time = str(xigua_sheet[i][17].replace("/", "-"))
			
 
				+                play_cnt = int(xigua_sheet[i][11])
			
 
				+                like_cnt = int(xigua_sheet[i][13])
			
 
				+                share_cnt = int(xigua_sheet[i][14])
			
 
				+                # collection_cnt = 0
			
 
				+                comment_cnt = int(xigua_sheet[i][12])
			
 
				+                crawler_rule = json.dumps({"play_cnt": 0, "comment_cnt": 0, "like_cnt": 0, "duration": 60, "publish_time": 10, "video_width": 720, "video_height": 720})
			
 
				+                width = int(xigua_sheet[i][16].split("*")[0])
			
 
				+                height = int(xigua_sheet[i][16].split("*")[1])
			
 
				+
			
 
				+                # print(f"video_id:{video_id}, type:{type(video_id)}")
			
 
				+                # print(f"user_id:{user_id}, type:{type(user_id)}")
			
 
				+                # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
			
 
				+                # print(f"platform:{platform}, type:{type(platform)}")
			
 
				+                # print(f"strategy:{strategy}, type:{type(strategy)}")
			
 
				+                # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
			
 
				+                # print(f"video_title:{video_title}, type:{type(video_title)}")
			
 
				+                # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
			
 
				+                # print(f"video_url:{video_url}, type:{type(video_url)}")
			
 
				+                # print(f"duration:{duration}, type:{type(duration)}")
			
 
				+                # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
			
 
				+                # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
			
 
				+                # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
			
 
				+                # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
			
 
				+                # print(f"collection_cnt:{collection_cnt}, type:{type(collection_cnt)}")
			
 
				+                # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
			
 
				+                # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
			
 
				+                # print(f"width:{width}, type:{type(width)}")
			
 
				+                # print(f"height:{height}, type:{type(height)}\n")
			
 
				+
			
 
				+                select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
			
 
				+                Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
			
 
				+                repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
			
 
				+                Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
			
 
				+
			
 
				+                if repeat_video is not None and len(repeat_video) != 0:
			
 
				+                    Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
			
 
				+                else:
			
 
				+                    # 视频信息保存数据库
			
 
				+                    insert_sql = f""" insert into crawler_video(video_id,
			
 
				+                                    user_id,
			
 
				+                                    out_user_id,
			
 
				+                                    platform,
			
 
				+                                    strategy,
			
 
				+                                    out_video_id,
			
 
				+                                    video_title,
			
 
				+                                    cover_url,
			
 
				+                                    video_url,
			
 
				+                                    duration,
			
 
				+                                    publish_time,
			
 
				+                                    play_cnt,
			
 
				+                                    like_cnt,
			
 
				+                                    share_cnt,
			
 
				+                                    comment_cnt,
			
 
				+                                    crawler_rule,
			
 
				+                                    width,
			
 
				+                                    height)
			
 
				+                                    values({video_id},
			
 
				+                                    {user_id},
			
 
				+                                    "{out_user_id}",
			
 
				+                                    "{platform}",
			
 
				+                                    "{strategy}",
			
 
				+                                    "{out_video_id}",
			
 
				+                                    "{video_title}",
			
 
				+                                    "{cover_url}",
			
 
				+                                    "{video_url}",
			
 
				+                                    {duration},
			
 
				+                                    "{publish_time}",
			
 
				+                                    {play_cnt},
			
 
				+                                    {like_cnt},
			
 
				+                                    {share_cnt},
			
 
				+                                    {comment_cnt},
			
 
				+                                    '{crawler_rule}',
			
 
				+                                    {width},
			
 
				+                                    {height}) """
			
 
				+                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+                    MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
			
 
				+                    Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    Insert.insert_video_from_feishu_to_mysql("insert", "xigua", "dev", "local")
			
--- a/xigua/xigua_follow/xigua_demo.py
+++ b/xigua/xigua_follow/xigua_demo.py
@@ -1,3 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2023/2/17
			
--- a/xigua/xigua_follow/xigua_follow.py
+++ b/xigua/xigua_follow/xigua_follow.py
@@ -1,10 +1,8 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Author: wangkun
			
 
				 # @Time: 2023/2/17
			
 
				-import json
			
 
				-
			
 
				-from lxml import etree
			
 
				 import base64
			
 
				+import json
			
 
				 import os
			
 
				 import random
			
 
				 import shutil
			
@@ -18,6 +16,7 @@ from selenium.webdriver import DesiredCapabilities
 
				 from selenium.webdriver.chrome.service import Service
			
 
				 from selenium.webdriver.common.by import By
			
 
				 from seleniumwire import webdriver
			
 
				+from lxml import etree
			
 
				 
			
 
				 sys.path.append(os.getcwd())
			
 
				 from common.db import MysqlHelper
			
@@ -35,12 +34,44 @@ class Follow:
 
				     platform = "西瓜视频"
			
 
				     tag = "西瓜视频爬虫,定向爬虫策略"
			
 
				 
			
 
				+    @classmethod
			
 
				+    def get_rule(cls, log_type, crawler):
			
 
				+        try:
			
 
				+            while True:
			
 
				+                rule_sheet = Feishu.get_values_batch(log_type, crawler, "4kxd31")
			
 
				+                if rule_sheet is None:
			
 
				+                    Common.logger(log_type, crawler).warning("rule_sheet is None! 10秒后重新获取")
			
 
				+                    time.sleep(10)
			
 
				+                    continue
			
 
				+                rule_dict = {
			
 
				+                    "play_cnt": int(rule_sheet[1][2]),
			
 
				+                    "comment_cnt": int(rule_sheet[2][2]),
			
 
				+                    "like_cnt": int(rule_sheet[3][2]),
			
 
				+                    "duration": int(rule_sheet[4][2]),
			
 
				+                    "publish_time": int(rule_sheet[5][2]),
			
 
				+                    "video_width": int(rule_sheet[6][2]),
			
 
				+                    "video_height": int(rule_sheet[7][2]),
			
 
				+                }
			
 
				+                return rule_dict
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
			
 
				+
			
 
				     # 下载规则
			
 
				-    @staticmethod
			
 
				-    def download_rule(duration, width, height):
			
 
				-        if int(duration) >= 60:
			
 
				-            if int(width) >= 720 or int(height) >= 720:
			
 
				-                return True
			
 
				+    @classmethod
			
 
				+    def download_rule(cls, video_info_dict, rule_dict):
			
 
				+        if video_info_dict['play_cnt'] >= rule_dict['play_cnt']:
			
 
				+            if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']:
			
 
				+                if video_info_dict['like_cnt'] >= rule_dict['like_cnt']:
			
 
				+                    if video_info_dict['duration'] >= rule_dict['duration']:
			
 
				+                        if video_info_dict['video_width'] >= rule_dict['video_width'] \
			
 
				+                                or video_info_dict['video_height'] >= rule_dict['video_height']:
			
 
				+                            return True
			
 
				+                        else:
			
 
				+                            return False
			
 
				+                    else:
			
 
				+                        return False
			
 
				+                else:
			
 
				+                    return False
			
 
				             else:
			
 
				                 return False
			
 
				         else:
			
@@ -50,15 +81,19 @@ class Follow:
 
				     @classmethod
			
 
				     def filter_words(cls, log_type, crawler):
			
 
				         try:
			
 
				-            filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
			
 
				-            filter_words_list = []
			
 
				-            for x in filter_words_sheet:
			
 
				-                for y in x:
			
 
				-                    if y is None:
			
 
				-                        pass
			
 
				-                    else:
			
 
				-                        filter_words_list.append(y)
			
 
				-            return filter_words_list
			
 
				+            while True:
			
 
				+                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
			
 
				+                if filter_words_sheet is None:
			
 
				+                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
			
 
				+                    continue
			
 
				+                filter_words_list = []
			
 
				+                for x in filter_words_sheet:
			
 
				+                    for y in x:
			
 
				+                        if y is None:
			
 
				+                            pass
			
 
				+                        else:
			
 
				+                            filter_words_list.append(y)
			
 
				+                return filter_words_list
			
 
				         except Exception as e:
			
 
				             Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
			
 
				 
			
@@ -100,53 +135,56 @@ class Follow:
 
				         except Exception as e:
			
 
				             Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
			
 
				 
			
 
				-
			
 
				     # 获取用户信息（字典格式）. 注意：部分 user_id 字符类型是 int / str
			
 
				     @classmethod
			
 
				     def get_user_list(cls, log_type, crawler, sheetid, env, machine):
			
 
				         try:
			
 
				-            user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
			
 
				-            our_user_list = []
			
 
				-            for i in range(1, len(user_sheet)):
			
 
				-                out_uid = user_sheet[i][2]
			
 
				-                user_name = user_sheet[i][3]
			
 
				-                our_uid = user_sheet[i][6]
			
 
				-                our_user_link = user_sheet[i][7]
			
 
				-                if out_uid is None or user_name is None:
			
 
				-                    Common.logger(log_type, crawler).info("空行\n")
			
 
				-                else:
			
 
				-                    Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
			
 
				-                    if our_uid is None:
			
 
				-                        out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
			
 
				-                        out_user_dict = {
			
 
				-                            "out_uid": out_uid,
			
 
				-                            "user_name": user_name,
			
 
				-                            "out_avatar_url": out_user_info["out_avatar_url"],
			
 
				-                            "out_create_time": '',
			
 
				-                            "out_tag": '',
			
 
				-                            "out_play_cnt": 0,
			
 
				-                            "out_fans": out_user_info["out_fans"],
			
 
				-                            "out_follow": out_user_info["out_follow"],
			
 
				-                            "out_friend": 0,
			
 
				-                            "out_like": out_user_info["out_like"],
			
 
				-                            "platform": cls.platform,
			
 
				-                            "tag": cls.tag,
			
 
				-                        }
			
 
				-                        our_user_dict = Users.create_user(log_type=log_type, crawler=crawler, out_user_dict=out_user_dict, env=env, machine=machine)
			
 
				-                        our_uid = our_user_dict['our_uid']
			
 
				-                        our_user_link = our_user_dict['our_user_link']
			
 
				-                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}', [[our_uid, our_user_link]])
			
 
				-                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
			
 
				-                        our_user_list.append(our_user_dict)
			
 
				+            while True:
			
 
				+                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
			
 
				+                if user_sheet is None:
			
 
				+                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
			
 
				+                    continue
			
 
				+                our_user_list = []
			
 
				+                for i in range(1, len(user_sheet)):
			
 
				+                    out_uid = user_sheet[i][2]
			
 
				+                    user_name = user_sheet[i][3]
			
 
				+                    our_uid = user_sheet[i][6]
			
 
				+                    our_user_link = user_sheet[i][7]
			
 
				+                    if out_uid is None or user_name is None:
			
 
				+                        Common.logger(log_type, crawler).info("空行\n")
			
 
				                     else:
			
 
				-                        our_user_dict = {
			
 
				-                            'out_uid': out_uid,
			
 
				-                            'user_name': user_name,
			
 
				-                            'our_uid': our_uid,
			
 
				-                            'our_user_link': our_user_link,
			
 
				-                        }
			
 
				-                        our_user_list.append(our_user_dict)
			
 
				-            return our_user_list
			
 
				+                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
			
 
				+                        if our_uid is None:
			
 
				+                            out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
			
 
				+                            out_user_dict = {
			
 
				+                                "out_uid": out_uid,
			
 
				+                                "user_name": user_name,
			
 
				+                                "out_avatar_url": out_user_info["out_avatar_url"],
			
 
				+                                "out_create_time": '',
			
 
				+                                "out_tag": '',
			
 
				+                                "out_play_cnt": 0,
			
 
				+                                "out_fans": out_user_info["out_fans"],
			
 
				+                                "out_follow": out_user_info["out_follow"],
			
 
				+                                "out_friend": 0,
			
 
				+                                "out_like": out_user_info["out_like"],
			
 
				+                                "platform": cls.platform,
			
 
				+                                "tag": cls.tag,
			
 
				+                            }
			
 
				+                            our_user_dict = Users.create_user(log_type=log_type, crawler=crawler, out_user_dict=out_user_dict, env=env, machine=machine)
			
 
				+                            our_uid = our_user_dict['our_uid']
			
 
				+                            our_user_link = our_user_dict['our_user_link']
			
 
				+                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}', [[our_uid, our_user_link]])
			
 
				+                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
			
 
				+                            our_user_list.append(our_user_dict)
			
 
				+                        else:
			
 
				+                            our_user_dict = {
			
 
				+                                'out_uid': out_uid,
			
 
				+                                'user_name': user_name,
			
 
				+                                'our_uid': our_uid,
			
 
				+                                'our_user_link': our_user_link,
			
 
				+                            }
			
 
				+                            our_user_list.append(our_user_dict)
			
 
				+                return our_user_list
			
 
				         except Exception as e:
			
 
				             Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
			
 
				 
			
@@ -695,7 +733,7 @@ class Follow:
 
				                     if 'video_duration' not in videoList[i]:
			
 
				                         video_duration = 0
			
 
				                     else:
			
 
				-                        video_duration = videoList[i]['video_duration']
			
 
				+                        video_duration = int(videoList[i]['video_duration'])
			
 
				 
			
 
				                     # send_time
			
 
				                     if 'publish_time' not in videoList[i]:
			
@@ -745,12 +783,20 @@ class Follow:
 
				                     else:
			
 
				                         cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0]['url']
			
 
				 
			
 
				+                    while True:
			
 
				+                        rule_dict = cls.get_rule(log_type, crawler)
			
 
				+                        if rule_dict is None:
			
 
				+                            Common.logger(log_type, crawler).warning(f"rule_dict:{rule_dict}, 10秒后重试")
			
 
				+                            time.sleep(10)
			
 
				+                        else:
			
 
				+                            break
			
 
				+
			
 
				                     if gid == 0 or video_id == 0 or cover_url == 0:
			
 
				                         Common.logger(log_type, crawler).info('无效视频\n')
			
 
				-                    elif is_top is True and int(time.time()) - int(publish_time) > 3600 * 24 * 10:
			
 
				-                        Common.logger(log_type, crawler).info(f'置顶视频，且发布时间超过10天:{publish_time_str}\n')
			
 
				-                    elif int(time.time()) - int(publish_time) > 3600 * 24 * 10:
			
 
				-                        Common.logger(log_type, crawler).info(f'发布时间超过10天:{publish_time_str}\n')
			
 
				+                    elif is_top is True and int(time.time()) - int(publish_time) > 3600 * 24 * rule_dict['publish_time']:
			
 
				+                        Common.logger(log_type, crawler).info(f'置顶视频，且发布时间:{publish_time_str}超过{rule_dict["publish_time"]}天\n')
			
 
				+                    elif int(time.time()) - int(publish_time) > 3600 * 24 * rule_dict['publish_time']:
			
 
				+                        Common.logger(log_type, crawler).info(f'发布时间:{publish_time_str}超过{rule_dict["publish_time"]}天\n')
			
 
				                         cls.offset = 0
			
 
				                         return
			
 
				                     else:
			
@@ -782,31 +828,40 @@ class Follow:
 
				                                       'session': signature}
			
 
				                         for k, v in video_dict.items():
			
 
				                             Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				-                        # cls.download_publish(log_type=log_type,
			
 
				-                        #                      crawler=crawler,
			
 
				-                        #                      video_dict=video_dict,
			
 
				-                        #                      strategy=strategy,
			
 
				-                        #                      our_uid=our_uid,
			
 
				-                        #                      oss_endpoint=oss_endpoint,
			
 
				-                        #                      env=env,
			
 
				-                        #                      machine=machine)
			
 
				+                        cls.download_publish(log_type=log_type,
			
 
				+                                             crawler=crawler,
			
 
				+                                             video_dict=video_dict,
			
 
				+                                             rule_dict=rule_dict,
			
 
				+                                             strategy=strategy,
			
 
				+                                             our_uid=our_uid,
			
 
				+                                             oss_endpoint=oss_endpoint,
			
 
				+                                             env=env,
			
 
				+                                             machine=machine)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def repeat_video(cls, log_type, crawler, video_id, env, machine):
			
 
				+        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
			
 
				+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
			
 
				+        return len(repeat_video)
			
 
				 
			
 
				     # 下载 / 上传
			
 
				     @classmethod
			
 
				-    def download_publish(cls, log_type, crawler, strategy, video_dict, our_uid, oss_endpoint, env, machine):
			
 
				+    def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
			
 
				         # try:
			
 
				-        if cls.download_rule(video_dict['duration'], video_dict['video_width'], video_dict['video_height']) is False:
			
 
				+        if cls.download_rule(video_dict, rule_dict) is False:
			
 
				             Common.logger(log_type, crawler).info('不满足抓取规则\n')
			
 
				         elif any(word if word in video_dict['video_title'] else False for word in cls.filter_words(log_type, crawler)) is True:
			
 
				             Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
			
 
				-        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]:
			
 
				+        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
			
 
				             Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				-        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', '3Ul6wZ') for x in y]:
			
 
				-            Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				-        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'QOWqMo') for x in y]:
			
 
				-            Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				-        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]:
			
 
				-            Common.logger(log_type, crawler).info('视频已存在\n')
			
 
				+        # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]:
			
 
				+        #     Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				+        # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', '3Ul6wZ') for x in y]:
			
 
				+        #     Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				+        # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'QOWqMo') for x in y]:
			
 
				+        #     Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				+        # elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]:
			
 
				+        #     Common.logger(log_type, crawler).info('视频已存在\n')
			
 
				         else:
			
 
				             # 下载封面
			
 
				             Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
			
@@ -865,7 +920,7 @@ class Follow:
 
				             Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
			
 
				 
			
 
				             # 视频信息保存数据库
			
 
				-            sql = f""" insert into crawler_video(video_id,
			
 
				+            insert_sql = f""" insert into crawler_video(video_id,
			
 
				                             user_id,
			
 
				                             out_user_id,
			
 
				                             platform,
			
@@ -881,7 +936,7 @@ class Follow:
 
				                             width,
			
 
				                             height)
			
 
				                             values({our_video_id},
			
 
				-                            "{our_uid}",
			
 
				+                            {our_uid},
			
 
				                             "{video_dict['user_id']}",
			
 
				                             "{cls.platform}",
			
 
				                             "定向爬虫策略",
			
@@ -892,14 +947,34 @@ class Follow:
 
				                             {int(video_dict['duration'])},
			
 
				                             "{video_dict['publish_time_str']}",
			
 
				                             {int(video_dict['play_cnt'])},
			
 
				-                            "4,5,6",
			
 
				+                            '{json.dumps(rule_dict)}',
			
 
				                             {int(video_dict['video_width'])},
			
 
				                             {int(video_dict['video_height'])}) """
			
 
				-            MysqlHelper.update_values(log_type, crawler, sql, env, machine)
			
 
				+            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+            MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
			
 
				             Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
			
 
				         # except Exception as e:
			
 
				         #     Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
			
 
				 
			
 
				+    @classmethod
			
 
				+    def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
			
 
				+        user_list = cls.get_user_list(log_type=log_type, crawler=crawler, sheetid="5tlTYB", env=env, machine=machine)
			
 
				+        for user in user_list:
			
 
				+            out_uid = user["out_uid"]
			
 
				+            user_name = user["user_name"]
			
 
				+            our_uid = user["our_uid"]
			
 
				+            Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
			
 
				+            cls.get_videolist(log_type=log_type,
			
 
				+                              crawler=crawler,
			
 
				+                              strategy=strategy,
			
 
				+                              our_uid=our_uid,
			
 
				+                              out_uid=out_uid,
			
 
				+                              oss_endpoint=oss_endpoint,
			
 
				+                              env=env,
			
 
				+                              machine=machine)
			
 
				+            cls.offset = 0
			
 
				+            time.sleep(3)
			
 
				+
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     # print(Follow.get_signature("follow", "xigua", "95420624045", "local"))
			
@@ -912,8 +987,8 @@ if __name__ == '__main__':
 
				     #                      env="dev",
			
 
				     #                      machine="local")
			
 
				     # print(Follow.random_signature())
			
 
				-    user_list = Follow.get_user_list(log_type="follow", crawler="xigua", sheetid="5tlTYB", env="dev", machine="local")
			
 
				-    print(len(user_list))
			
 
				-    for user in user_list:
			
 
				-        print(user)
			
 
				+    rule = Follow.get_rule("follow", "xigua")
			
 
				+    print(type(rule))
			
 
				+    print(type(json.dumps(rule)))
			
 
				+    print(json.dumps(rule))
			
 
				     pass
			
--- a/xigua/xigua_main/run_xigua_follow.py
+++ b/xigua/xigua_main/run_xigua_follow.py
@@ -1,3 +1,39 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Author: wangkun
			
 
				 # @Time: 2023/2/17
			
 
				+import argparse
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+from xigua.xigua_follow.xigua_follow import Follow
			
 
				+
			
 
				+
			
 
				+def main(log_type, crawler, strategy, oss_endpoint, env, machine):
			
 
				+    while True:
			
 
				+        Common.logger(log_type, crawler).info('开始抓取 西瓜视频 定向榜\n')
			
 
				+        Follow.get_follow_videos(log_type, crawler, strategy, oss_endpoint, env, machine)
			
 
				+        Common.del_logs(log_type, crawler)
			
 
				+        Common.logger(log_type, crawler).info('抓取完一轮，休眠 1 分钟\n')
			
 
				+        time.sleep(60)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
			
 
				+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
			
 
				+    parser.add_argument('--crawler')  ## 添加参数
			
 
				+    parser.add_argument('--strategy')  ## 添加参数
			
 
				+    parser.add_argument('--our_uid')  ## 添加参数
			
 
				+    parser.add_argument('--oss_endpoint')  ## 添加参数
			
 
				+    parser.add_argument('--env')  ## 添加参数
			
 
				+    parser.add_argument('--machine')  ## 添加参数
			
 
				+    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
			
 
				+    # print(args)
			
 
				+    main(log_type=args.log_type,
			
 
				+         crawler=args.crawler,
			
 
				+         strategy=args.strategy,
			
 
				+         oss_endpoint=args.oss_endpoint,
			
 
				+         env=args.env,
			
 
				+         machine=args.machine)
			
--- a/youtube/.DS_Store
+++ b/youtube/.DS_Store
--- a/youtube/youtube_main/run_youtube_search.py
+++ b/youtube/youtube_main/run_youtube_search.py
@@ -1,3 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2023/2/3