3 năm trước cách đây · 7f4d4d2725
--- a/README.md
+++ b/README.md
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/5/23
			
--- a/logs/__init__.py
+++ b/logs/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/5/23
			
--- a/main/__init__.py
+++ b/main/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/5/23
			
--- a/main/common.py
+++ b/main/common.py
@@ -0,0 +1,137 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/5/27
			
 
				+from datetime import date, timedelta
			
 
				+from loguru import logger
			
 
				+import datetime
			
 
				+import os
			
 
				+import time
			
 
				+import requests
			
 
				+import urllib3
			
 
				+
			
 
				+proxies = {"http": None, "https": None}
			
 
				+# proxies = {"http": "127.0.0.1:19180", "https": "127.0.0.1:19180"}
			
 
				+
			
 
				+
			
 
				+class Common:
			
 
				+    # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
			
 
				+    now = datetime.datetime.now()
			
 
				+    # 昨天 <class 'str'>  2022-04-13
			
 
				+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
			
 
				+    # 今天 <class 'datetime.date'>  2022-04-14
			
 
				+    today = date.today()
			
 
				+    # 明天 <class 'str'>  2022-04-15
			
 
				+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
			
 
				+
			
 
				+    # 使用 logger 模块生成日志
			
 
				+    @staticmethod
			
 
				+    def logger():
			
 
				+        """
			
 
				+        使用 logger 模块生成日志
			
 
				+        """
			
 
				+        # 日志路径
			
 
				+        log_dir = "./logs/"
			
 
				+        log_path = os.getcwd() + os.sep + log_dir
			
 
				+        if not os.path.isdir(log_path):
			
 
				+            os.makedirs(log_path)
			
 
				+
			
 
				+        # 日志文件名
			
 
				+        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
			
 
				+
			
 
				+        # 日志不打印到控制台
			
 
				+        logger.remove(handler_id=None)
			
 
				+
			
 
				+        # rotation="500 MB"，实现每 500MB 存储一个文件
			
 
				+        # rotation="12:00"，实现每天 12:00 创建一个文件
			
 
				+        # rotation="1 week"，每周创建一个文件
			
 
				+        # retention="10 days"，每隔10天之后就会清理旧的日志
			
 
				+        # 初始化日志
			
 
				+        logger.add(log_dir + log_name, level="INFO", rotation='00:00')
			
 
				+
			
 
				+        return logger
			
 
				+
			
 
				+    # 清除日志，保留最近 7 个文件
			
 
				+    @classmethod
			
 
				+    def del_logs(cls, d_dir):
			
 
				+        """
			
 
				+        清除冗余日志文件
			
 
				+        :d_dir: 需要删除的 log 地址
			
 
				+        :return: 保留最近 7 个日志
			
 
				+        """
			
 
				+        if d_dir == "logs":
			
 
				+            logs_dir = "./logs/"
			
 
				+            all_files = sorted(os.listdir(logs_dir))
			
 
				+            all_logs = []
			
 
				+            for log in all_files:
			
 
				+                name = os.path.splitext(log)[-1]
			
 
				+                if name == ".log":
			
 
				+                    all_logs.append(log)
			
 
				+
			
 
				+            if len(all_logs) <= 7:
			
 
				+                pass
			
 
				+            else:
			
 
				+                for file in all_logs[:len(all_logs) - 7]:
			
 
				+                    os.remove(logs_dir + file)
			
 
				+            cls.logger().info("清除冗余日志成功")
			
 
				+
			
 
				+    # 封装下载视频或封面的方法
			
 
				+    @classmethod
			
 
				+    def download_method(cls, log_path, text, d_name, d_url):
			
 
				+        """
			
 
				+        下载封面：text == "cover" ； 下载视频：text == "video"
			
 
				+        需要下载的视频标题：d_title
			
 
				+        视频封面，或视频播放地址：d_url
			
 
				+        下载保存路径："./files/{d_title}/"
			
 
				+        """
			
 
				+        # 首先创建一个保存该视频相关信息的文件夹
			
 
				+        video_dir = "./videos/" + d_name + "/"
			
 
				+        # video_dir = "./videos/"
			
 
				+        if not os.path.exists(video_dir):
			
 
				+            os.mkdir(video_dir)
			
 
				+
			
 
				+        # 下载视频
			
 
				+        if text == "video":
			
 
				+            # 需要下载的视频地址
			
 
				+            video_url = d_url
			
 
				+            # 视频名
			
 
				+            video_name = "video.mp4"
			
 
				+            # # 视频名
			
 
				+            # video_name = d_name + ".mp4"
			
 
				+
			
 
				+            # 下载视频
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
			
 
				+            try:
			
 
				+                with open(video_dir + video_name, "wb") as f:
			
 
				+                    for chunk in response.iter_content(chunk_size=10240):
			
 
				+                        f.write(chunk)
			
 
				+                if log_path == "logs":
			
 
				+                    cls.logger().info("==========视频下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                if log_path == "logs":
			
 
				+                    cls.logger().exception("视频下载失败：{}", e)
			
 
				+
			
 
				+        # 下载封面
			
 
				+        elif text == "cover":
			
 
				+            # 需要下载的封面地址
			
 
				+            cover_url = d_url
			
 
				+            # 封面名
			
 
				+            cover_name = "image.jpg"
			
 
				+            # # 封面名
			
 
				+            # cover_name = d_name + ".jpg"
			
 
				+
			
 
				+            # 下载封面
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.get(cover_url, proxies=proxies, verify=False)
			
 
				+            try:
			
 
				+                with open(video_dir + cover_name, "wb") as f:
			
 
				+                    f.write(response.content)
			
 
				+                if log_path == "logs":
			
 
				+                    cls.logger().info("==========封面下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                if log_path == "logs":
			
 
				+                    cls.logger().exception("封面下载失败：{}", e)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    common = Common()
			
--- a/main/demo.py
+++ b/main/demo.py
@@ -0,0 +1,42 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/5/30
			
 
				+import time
			
 
				+from datetime import date
			
 
				+
			
 
				+from dateutil import parser
			
 
				+
			
 
				+from feishu_lib import Feishu
			
 
				+
			
 
				+# time1 = time.time()
			
 
				+# time2 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time1))
			
 
				+# print(time2)
			
 
				+# print(type(time2))
			
 
				+
			
 
				+# # 前天 <class 'str'>  2022-04-15
			
 
				+# before_yesterday = (date.today() + timedelta(days=-2)).strftime("%Y-%m-%d")
			
 
				+# # 昨天 <class 'str'>  2022-04-13
			
 
				+# yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
			
 
				+# # 今天 <class 'datetime.date'>  2022-04-14
			
 
				+# today = date.today()
			
 
				+# print(before_yesterday)
			
 
				+# print(yesterday)
			
 
				+# print(today)
			
 
				+
			
 
				+# lists = Feishu.get_values_batch("twitter", "PZGpSZ")
			
 
				+# for i in lists:
			
 
				+#     for j in i:
			
 
				+#         # 过滤空的单元格内容
			
 
				+#         if j is None:
			
 
				+#             pass
			
 
				+#         elif "#" in j:
			
 
				+#             pass
			
 
				+#         else:
			
 
				+#             print(j)
			
 
				+
			
 
				+# date1 = parser.parse('null').strftime("%Y/%m/%d %H:%M:%S")
			
 
				+# print(date1)
			
 
				+time1 = time.time()
			
 
				+time.sleep(1)
			
 
				+time2 = time.time()
			
 
				+print(time2-time1)
			
--- a/main/feishu_lib.py
+++ b/main/feishu_lib.py
@@ -0,0 +1,338 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/5/27
			
 
				+import json
			
 
				+import time
			
 
				+
			
 
				+import requests
			
 
				+import urllib3
			
 
				+from common import Common
			
 
				+
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class Feishu:
			
 
				+    """
			
 
				+    编辑飞书云文档
			
 
				+    """
			
 
				+    # 看一看爬虫数据表
			
 
				+    kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
			
 
				+    # 快手爬虫数据表
			
 
				+    kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?"
			
 
				+    # 微视爬虫数据表
			
 
				+    weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
			
 
				+    # 小年糕爬虫数据表
			
 
				+    xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
			
 
				+    # twitter 爬虫吧
			
 
				+    twitter_url = "https://whtlrai9ej.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?"
			
 
				+
			
 
				+    # 飞书路径token
			
 
				+    @classmethod
			
 
				+    def spreadsheettoken(cls, crawler):
			
 
				+        """
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        """
			
 
				+        if crawler == "kanyikan":
			
 
				+            return "shtcngRPoDYAi24x52j2nDuHMih"
			
 
				+        elif crawler == "kuaishou":
			
 
				+            return "shtcnp4SaJt37q6OOOrYzPMjQkg"
			
 
				+        elif crawler == "weishi":
			
 
				+            return "shtcn5YSWg91JfVGzj0SFZIRRPh"
			
 
				+        elif crawler == "xiaoniangao":
			
 
				+            return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
			
 
				+        elif crawler == "twitter":
			
 
				+            return "shtcn6BYfYuqegIP13ORB6rI2dh"
			
 
				+
			
 
				+    # 获取飞书api token
			
 
				+    @classmethod
			
 
				+    def get_token(cls):
			
 
				+        """
			
 
				+        获取飞书api token
			
 
				+        :return:
			
 
				+        """
			
 
				+        time.sleep(1)
			
 
				+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
			
 
				+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
			
 
				+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
			
 
				+            tenant_access_token = response.json()["tenant_access_token"]
			
 
				+            return tenant_access_token
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("获取飞书 api token 异常:{}", e)
			
 
				+
			
 
				+    # 获取表格元数据
			
 
				+    @classmethod
			
 
				+    def get_metainfo(cls, crawler):
			
 
				+        """
			
 
				+        获取表格元数据
			
 
				+        :return:
			
 
				+        """
			
 
				+        get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                           + cls.spreadsheettoken(crawler) + "/metainfo"
			
 
				+
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        params = {
			
 
				+            "extFields": "protectedRange",  # 额外返回的字段，extFields=protectedRange时返回保护行列信息
			
 
				+            "user_id_type": "open_id"  # 返回的用户id类型，可选open_id,union_id
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            return response
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("获取表格元数据异常:{}", e)
			
 
				+
			
 
				+    # 读取工作表中所有数据
			
 
				+    @classmethod
			
 
				+    def get_values_batch(cls, crawler, sheetid):
			
 
				+        """
			
 
				+        读取工作表中所有数据
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张表
			
 
				+        :return: 所有数据
			
 
				+        """
			
 
				+        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                               + cls.spreadsheettoken(crawler) + "/values_batch_get"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        params = {
			
 
				+            # 多个查询范围 如 url?ranges=range1,range2 ，其中 range 包含 sheetId 与单元格范围两部分
			
 
				+            "ranges": sheetid,
			
 
				+
			
 
				+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+            # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+            # valueRenderOption=Formula单元格中含有公式时返回公式本身；
			
 
				+            # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
			
 
				+            "valueRenderOption": "ToString",
			
 
				+
			
 
				+            # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+            "dateTimeRenderOption": "",
			
 
				+
			
 
				+            # 返回的用户id类型，可选open_id,union_id
			
 
				+            "user_id_type": "open_id"
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            values = response["data"]["valueRanges"][0]["values"]
			
 
				+            return values
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("读取工作表所有数据异常:{}", e)
			
 
				+
			
 
				+    # 工作表，插入行或列
			
 
				+    @classmethod
			
 
				+    def insert_columns(cls, crawler, sheetid, majordimension, startindex, endindex):
			
 
				+        """
			
 
				+        工作表插入行或列
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param majordimension:行或者列
			
 
				+        :param startindex:开始位置
			
 
				+        :param endindex:结束位置
			
 
				+        """
			
 
				+        insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                             + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        body = {
			
 
				+            "dimension": {
			
 
				+                "sheetId": sheetid,
			
 
				+                "majorDimension": majordimension,  # 默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+                "startIndex": startindex,  # 开始的位置
			
 
				+                "endIndex": endindex  # 结束的位置
			
 
				+            },
			
 
				+            "inheritStyle": "AFTER"  # BEFORE 或 AFTER，不填为不继承 style
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger().info("插入行或列:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("插入行或列异常:{}", e)
			
 
				+
			
 
				+    # 写入数据
			
 
				+    @classmethod
			
 
				+    def update_values(cls, crawler, sheetid, ranges, values):
			
 
				+        """
			
 
				+        写入数据
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:单元格范围
			
 
				+        :param values:写入的具体数据，list
			
 
				+        """
			
 
				+        update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                            + cls.spreadsheettoken(crawler) + "/values_batch_update"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        body = {
			
 
				+            "valueRanges": [
			
 
				+                {
			
 
				+                    "range": sheetid + "!" + ranges,
			
 
				+                    "values": values
			
 
				+                },
			
 
				+            ],
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger().info("写入数据:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("写入数据异常:{}", e)
			
 
				+
			
 
				+    # 合并单元格
			
 
				+    @classmethod
			
 
				+    def merge_cells(cls, crawler, sheetid, ranges):
			
 
				+        """
			
 
				+        合并单元格
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:需要合并的单元格范围
			
 
				+        """
			
 
				+        merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                          + cls.spreadsheettoken(crawler) + "/merge_cells"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+
			
 
				+        body = {
			
 
				+            "range": sheetid + "!" + ranges,
			
 
				+            "mergeType": "MERGE_ROWS"
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger().info("合并单元格:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("合并单元格异常:{}", e)
			
 
				+
			
 
				+    # 读取单元格数据
			
 
				+    @classmethod
			
 
				+    def get_range_value(cls, crawler, sheetid, cell):
			
 
				+        """
			
 
				+        读取单元格内容
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张工作表
			
 
				+        :param cell: 哪个单元格
			
 
				+        :return: 单元格内容
			
 
				+        """
			
 
				+        get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                              + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        params = {
			
 
				+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+            # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+            # valueRenderOption=Formula 单元格中含有公式时返回公式本身；
			
 
				+            # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
			
 
				+            "valueRenderOption": "FormattedValue",
			
 
				+
			
 
				+            # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+            "dateTimeRenderOption": "",
			
 
				+
			
 
				+            # 返回的用户id类型，可选open_id,union_id
			
 
				+            "user_id_type": "open_id"
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            return r.json()["data"]["valueRange"]["values"][0]
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("读取单元格数据异常:{}", e)
			
 
				+
			
 
				+    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+    @classmethod
			
 
				+    def dimension_range(cls, crawler, sheetid, major_dimension, startindex, endindex):
			
 
				+        """
			
 
				+        删除行或列
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:工作表
			
 
				+        :param major_dimension:默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+        :param startindex:开始的位置
			
 
				+        :param endindex:结束的位置
			
 
				+        :return:
			
 
				+        """
			
 
				+        dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                              + cls.spreadsheettoken(crawler) + "/dimension_range"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        body = {
			
 
				+            "dimension": {
			
 
				+                "sheetId": sheetid,
			
 
				+                "majorDimension": major_dimension,
			
 
				+                "startIndex": startindex,
			
 
				+                "endIndex": endindex
			
 
				+            }
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger().info("删除视频数据:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("删除视频数据异常:{}", e)
			
 
				+
			
 
				+    # 查找单元格
			
 
				+    @classmethod
			
 
				+    def find_cell(cls, crawler, sheetid, find_text):
			
 
				+        """
			
 
				+        查找单元格
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张表
			
 
				+        # :param ranges: 单元格范围
			
 
				+        :param find_text: 查找的字符
			
 
				+        :return: 返回单元格索引
			
 
				+        """
			
 
				+        find_cell_url = "https://open.feishu.cn/open-apis/sheets/v3/spreadsheets/" \
			
 
				+                        + cls.spreadsheettoken(crawler) + "/sheets/" \
			
 
				+                        + sheetid + "/find"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        rows_count = len(cls.get_values_batch("twitter", "db114c"))
			
 
				+        body = {
			
 
				+            "find_condition": {
			
 
				+                "range": sheetid+"!A1:A"+str(rows_count),
			
 
				+                "match_case": True,  # 是否忽略大小写
			
 
				+                "match_entire_cell": False,  # 是否匹配整个单元格
			
 
				+                "search_by_regex": False,  # 是否为正则匹配
			
 
				+                "include_formulas": False  # 是否搜索公式内容
			
 
				+            },
			
 
				+            "find": find_text  # 搜索内容
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=find_cell_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger().info("查找单元格:{}", r.json()["msg"])
			
 
				+            matched_cell = r.json()["data"]["find_result"]["matched_cells"][0].split("A")[-1]
			
 
				+            return matched_cell
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("查找单元格异常:{}", e)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    feishu = Feishu()
			
 
				+    print(feishu.find_cell("twitter", "db114c", "956929025645035522"))
			
 
				+    print(type(feishu.find_cell("twitter", "db114c", "956929025645035522")))
			
 
				+
			
 
				+    pass
			
--- a/main/run.py
+++ b/main/run.py
@@ -0,0 +1,17 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/6/1
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+from search_by_words import Search
			
 
				+
			
 
				+
			
 
				+def main_pord():
			
 
				+    while True:
			
 
				+        Search.search_users_by_key_words()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main_pord()
			
--- a/main/search_by_words.py
+++ b/main/search_by_words.py
@@ -0,0 +1,361 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/5/23
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+import requests
			
 
				+from datetime import date, timedelta
			
 
				+from dateutil import parser
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common import Common
			
 
				+from feishu_lib import Feishu
			
 
				+
			
 
				+proxies = {"http": "127.0.0.1:19180", "https": "127.0.0.1:19180"}
			
 
				+
			
 
				+
			
 
				+class Search:
			
 
				+    # 前天 <class 'str'>  2022-04-15
			
 
				+    before_yesterday = (date.today() + timedelta(days=2)).strftime("%Y-%m-%d")
			
 
				+    # 昨天 <class 'str'>  2022-04-13
			
 
				+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
			
 
				+    # 今天 <class 'datetime.date'>  2022-04-14
			
 
				+    today = date.today()
			
 
				+
			
 
				+    cursor = ""
			
 
				+
			
 
				+    # 搜索词列表
			
 
				+    @classmethod
			
 
				+    def search_words(cls):
			
 
				+        # 搜索词
			
 
				+        word_list = []
			
 
				+        # 从云文档读取所有敏感词，添加到词库列表
			
 
				+        time.sleep(1)
			
 
				+        lists = Feishu.get_values_batch("twitter", "PZGpSZ")
			
 
				+        for i in lists:
			
 
				+            for j in i:
			
 
				+                # 过滤空的单元格内容
			
 
				+                if j is None:
			
 
				+                    pass
			
 
				+                elif "#" in j:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    word_list.append(j)
			
 
				+        return word_list
			
 
				+
			
 
				+    # 更新用户信息
			
 
				+    @classmethod
			
 
				+    def update_user_info(cls, uid, key_word, values):
			
 
				+        try:
			
 
				+            if len(Feishu.get_values_batch("twitter", "db114c")) == 1:
			
 
				+                Common.logger().info("无用户信息")
			
 
				+            else:
			
 
				+                time.sleep(1)
			
 
				+                i = Feishu.find_cell("twitter", "db114c", uid)
			
 
				+                user_words = Feishu.get_range_value("twitter", "db114c", "B" + str(i) + ":" + "B" + str(i))
			
 
				+                user_create_time = Feishu.get_range_value("twitter", "db114c", "T" + str(i) + ":" + "T" + str(i))[0]
			
 
				+                user_update_time = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(time.time()))
			
 
				+                if key_word in user_words:
			
 
				+                    Common.logger().info("满足条件:key_word已存在，更新当前用户信息:{}", uid)
			
 
				+                    time.sleep(1)
			
 
				+                    values.append(user_create_time)
			
 
				+                    values.append(user_update_time)
			
 
				+                    Common.logger().info("values:{}", values)
			
 
				+                    Feishu.update_values("twitter", "db114c", "C" + str(i) + ":" + "U" + str(i), [values])
			
 
				+                    Common.logger().info("用户:{}信息更新成功", uid)
			
 
				+                    return
			
 
				+                elif key_word not in user_words:
			
 
				+                    Common.logger().info("满足条件:key_word不存在，更新当前用户信息:{}", uid)
			
 
				+                    # 先更新除了 key_word 以外的信息
			
 
				+                    time.sleep(1)
			
 
				+                    values.append(user_create_time)
			
 
				+                    values.append(user_update_time)
			
 
				+                    Common.logger().info("values:{}", values)
			
 
				+                    Feishu.update_values("twitter", "db114c", "C" + str(i) + ":" + "U" + str(i), [values])
			
 
				+                    Common.logger().info("用户:{}信息更新成功", uid)
			
 
				+                    # 再更新 key_word
			
 
				+                    time.sleep(1)
			
 
				+                    words = user_words[0]+","+key_word
			
 
				+                    Feishu.update_values("twitter", "db114c", "B" + str(i) + ":" + "B" + str(i),
			
 
				+                                         [[str(words)]])
			
 
				+                    Common.logger().info("用户key_word:{}更新成功", key_word)
			
 
				+                    return
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("更新用户信息异常:{}", e)
			
 
				+
			
 
				+    # 根据关键字搜索
			
 
				+    @classmethod
			
 
				+    def search_users_v2(cls, key_word):
			
 
				+        try:
			
 
				+            cursor_params = ''
			
 
				+            if len(cls.cursor) > 0:
			
 
				+                cursor_params = '&cursor={}'.format(cls.cursor)
			
 
				+            # 搜索最近三天的数据
			
 
				+            # url = "https://twitter.com/i/api/2/search/adaptive.json?" \
			
 
				+            #       "include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&" \
			
 
				+            #       "include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&" \
			
 
				+            #       "include_can_media_tag=1&include_ext_has_nft_avatar=1&skip_status=1&" \
			
 
				+            #       "cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&" \
			
 
				+            #       "include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&" \
			
 
				+            #       "include_ext_media_color=true&include_ext_media_availability=true&" \
			
 
				+            #       "include_ext_sensitive_media_warning=true&include_ext_trusted_friends_metadata=true&" \
			
 
				+            #       "send_error_codes=true&simple_quoted_tweet=true&" \
			
 
				+            #       "q=(" + key_word + ")%20until%3A" + str(cls.today) + "%20since%3A" + str(cls.before_yesterday) + \
			
 
				+            #       "&result_filter=user&count=20&query_source=typed_query" + cursor_params + \
			
 
				+            #       "&pc=1&spelling_corrections=1&ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2" \
			
 
				+            #       "Cenrichments%2CsuperFollowMetadata%2CunmentionInfo"
			
 
				+
			
 
				+            url = "https://twitter.com/i/api/2/search/adaptive.json?" \
			
 
				+                  "include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&" \
			
 
				+                  "include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&" \
			
 
				+                  "include_can_media_tag=1&include_ext_has_nft_avatar=1&skip_status=1&" \
			
 
				+                  "cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&" \
			
 
				+                  "include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&" \
			
 
				+                  "include_ext_media_color=true&include_ext_media_availability=true&" \
			
 
				+                  "include_ext_sensitive_media_warning=true&include_ext_trusted_friends_metadata=true&" \
			
 
				+                  "send_error_codes=true&simple_quoted_tweet=true&" \
			
 
				+                  "q=" + key_word + \
			
 
				+                  "&result_filter=user&count=20&query_source=typed_query" + cursor_params + \
			
 
				+                  "&pc=1&spelling_corrections=1&ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2" \
			
 
				+                  "Cenrichments%2CsuperFollowMetadata%2CunmentionInfo"
			
 
				+            headers = {
			
 
				+                'authority': 'twitter.com',
			
 
				+                'accept': '*/*',
			
 
				+                'accept-language': 'zh-CN,zh;q=0.9',
			
 
				+                'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz'
			
 
				+                                 '4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
			
 
				+                'cookie': 'guest_id_marketing=v1%3A164691584304284451; guest_id_ads=v1%3A164691584304284451;'
			
 
				+                          ' kdt=RGGgmMi1qsAE8ap8NlKFjpksuDXG9gdD1utIeK0u; des_opt_in=Y; _gcl_au=1.1.1066'
			
 
				+                          '77612.1647418528;'
			
 
				+                          ' g_state={"i_l":0}; _gid=GA1.2.645428048.1652699425;'
			
 
				+                          ' personalization_id="v1_zSZMfoG7rsTlMHQYwOA39Q=="; guest_id=v1%3A165294843395764407;'
			
 
				+                          ' auth_token=592dbe3e68ce355f31f8343d700215030fbcd817;'
			
 
				+                          ' ct0=df0294bd236bf2b599c0c62906066652be2f03658877d0fe982fbb0bb645270e8485ddb2f7f39a447'
			
 
				+                          'b9e7ab341e244415576d8303df6302876fb00b8a5c996871bcfc2703a5d1c1056545ab007de55be;'
			
 
				+                          ' twid=u%3D1501900092303101953; external_referer=padhuUp37zg6GVaBnLSoCA0layDKYA'
			
 
				+                          'Tn|0|8e8t2xd8A2w%3D; mbox=PC#3ffa21b420af400ca9e94d2b1b72525c.32_0#1716385856|s'
			
 
				+                          'ession#047c8af8f5e34fa585b247e05c6f0a6b#1653142916; _ga=GA1.2.659870250.1646915849;'
			
 
				+                          ' _ga_BYKEBDM7DS=GS1.1.1653201242.12.0.1653201242.0; _ga_34PHSZMC42=GS1.1.1653201242.5'
			
 
				+                          '8.0.1653201242.0; lang=zh-cn; _twitter_sess=BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6R'
			
 
				+                          'mxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCMQBs%252BqAAToMY3NyZl9p%250AZC'
			
 
				+                          'IlYjJkNWIyOTZiMzhmMGVlNWM1NDY0MmUyNDM5NTJkNjg6B2lkIiVkZjNl%250AMWNkNTY5OTUwNDdiYzgzNDE1NG'
			
 
				+                          'UyNjA3ZWU1NA%253D%253D--b3450fa2f7a9503c9e5e8356aff22570d29a7912; guest_id=v1%3A16479480474'
			
 
				+                          '0239293; guest_id_ads=v1%3A164794804740239293; guest_id_marketing=v1%3A164794804740239293;'
			
 
				+                          ' personalization_id="v1_/1LnzKXLyeYnZl13Ri62bg=="',
			
 
				+                # 搜索最近三天的
			
 
				+                # 'referer': "https://twitter.com/search?q=(" + key_word + ")%20until%3A" + str(cls.today) +
			
 
				+                #            "%20since%3A" + str(cls.before_yesterday) + "&src=typed_query&f=user",
			
 
				+                'referer': "https://twitter.com/search?q=" + key_word + "&src=typed_query&f=user",
			
 
				+                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"',
			
 
				+                'sec-ch-ua-mobile': '?0',
			
 
				+                'sec-ch-ua-platform': '"macOS"',
			
 
				+                'sec-fetch-dest': 'empty',
			
 
				+                'sec-fetch-mode': 'cors',
			
 
				+                'sec-fetch-site': 'same-origin',
			
 
				+                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)'
			
 
				+                              ' Chrome/101.0.4951.64 Safari/537.36',
			
 
				+                'x-csrf-token': 'df0294bd236bf2b599c0c62906066652be2f03658877d0fe982fbb0bb645270e8485ddb2f'
			
 
				+                                '7f39a447b9e7ab341e244415576d8303df6302876fb00b8a5c996871bcfc2703a5d1c10565'
			
 
				+                                '45ab007de55be',
			
 
				+                'x-twitter-active-user': 'yes',
			
 
				+                'x-twitter-auth-type': 'OAuth2Session',
			
 
				+                'x-twitter-client-language': 'zh-cn'
			
 
				+            }
			
 
				+            r = requests.get(url=url, headers=headers, proxies=proxies)
			
 
				+            # Common.logger().info("response:{}", r.text)
			
 
				+            cls.cursor = r.json()["timeline"]["instructions"][-1]["addEntries"][
			
 
				+                "entries"][-1]["content"]["operation"]["cursor"]["value"]
			
 
				+            # Common.logger().info("cursor:{}", cls.cursor)
			
 
				+            users = r.json()["globalObjects"]["users"]
			
 
				+            if len(users) == 0:
			
 
				+                Common.logger().info("本次请求无数据返回")
			
 
				+                return
			
 
				+            else:
			
 
				+                userid_list = []
			
 
				+                for userid in users:
			
 
				+                    userid_list.append(userid)
			
 
				+                for userinfo in userid_list:
			
 
				+                    userinfo = users[userinfo]
			
 
				+
			
 
				+                    if "id_str" in userinfo:
			
 
				+                        uid = userinfo["id_str"]
			
 
				+                    else:
			
 
				+                        uid = "null"
			
 
				+
			
 
				+                    if "name" in userinfo:
			
 
				+                        name = userinfo["name"]
			
 
				+                    else:
			
 
				+                        name = "null"
			
 
				+
			
 
				+                    if "screen_name" in userinfo:
			
 
				+                        screen_name = userinfo["screen_name"]
			
 
				+                    else:
			
 
				+                        screen_name = "null"
			
 
				+
			
 
				+                    if screen_name == "null":
			
 
				+                        person_url = "null"
			
 
				+                    else:
			
 
				+                        person_url = "https://twitter.com/" + screen_name
			
 
				+
			
 
				+                    if "description" in userinfo:
			
 
				+                        description = userinfo["description"]
			
 
				+                    else:
			
 
				+                        description = "null"
			
 
				+
			
 
				+                    if "location" in userinfo:
			
 
				+                        location = userinfo["location"]
			
 
				+                    else:
			
 
				+                        location = "null"
			
 
				+
			
 
				+                    if "friends_count" in userinfo:
			
 
				+                        friends_count = userinfo["friends_count"]
			
 
				+                    else:
			
 
				+                        friends_count = "null"
			
 
				+
			
 
				+                    if "followers_count" in userinfo:
			
 
				+                        followers_count = userinfo["followers_count"]
			
 
				+                    else:
			
 
				+                        followers_count = "null"
			
 
				+
			
 
				+                    if "favourites_count" in userinfo:
			
 
				+                        favourites_count = userinfo["favourites_count"]
			
 
				+                    else:
			
 
				+                        favourites_count = "null"
			
 
				+
			
 
				+                    if "listed_count" in userinfo:
			
 
				+                        listed_count = userinfo["listed_count"]
			
 
				+                    else:
			
 
				+                        listed_count = "null"
			
 
				+
			
 
				+                    if "statuses_count" in userinfo:
			
 
				+                        statuses_count = userinfo["statuses_count"]
			
 
				+                    else:
			
 
				+                        statuses_count = "null"
			
 
				+
			
 
				+                    if "media_count" in userinfo:
			
 
				+                        media_count = userinfo["media_count"]
			
 
				+                    else:
			
 
				+                        media_count = "null"
			
 
				+
			
 
				+                    if "entities" not in userinfo:
			
 
				+                        display_url = "null"
			
 
				+                    elif "url" not in userinfo["entities"]:
			
 
				+                        display_url = "null"
			
 
				+                    elif "display_url" in userinfo["entities"]["url"]["urls"][0]:
			
 
				+                        display_url = userinfo["entities"]["url"]["urls"][0]["display_url"]
			
 
				+                    elif "expanded_url" in userinfo["entities"]["url"]["urls"][0]:
			
 
				+                        display_url = userinfo["entities"]["url"]["urls"][0]["expanded_url"]
			
 
				+                    elif "url" in userinfo["entities"]["url"]["urls"][0]:
			
 
				+                        display_url = userinfo["entities"]["url"]["urls"][0]["url"]
			
 
				+                    else:
			
 
				+                        display_url = "null"
			
 
				+
			
 
				+                    if "created_at" in userinfo:
			
 
				+                        created_at1 = userinfo["created_at"]
			
 
				+                        created_at = str(parser.parse(created_at1).strftime("%Y/%m/%d %H:%M:%S"))
			
 
				+                    else:
			
 
				+                        created_at = "null"
			
 
				+
			
 
				+                    if "profile_image_url" in userinfo:
			
 
				+                        profile_image_url = userinfo["profile_image_url"]
			
 
				+                    else:
			
 
				+                        profile_image_url = "null"
			
 
				+
			
 
				+                    if "profile_banner_url" in userinfo:
			
 
				+                        profile_banner_url = userinfo["profile_banner_url"]
			
 
				+                    else:
			
 
				+                        profile_banner_url = "null"
			
 
				+
			
 
				+                    if "ext_has_nft_avatar" in userinfo:
			
 
				+                        ext_has_nft_avatar = userinfo["ext_has_nft_avatar"]
			
 
				+                    else:
			
 
				+                        ext_has_nft_avatar = "null"
			
 
				+
			
 
				+                    if "verified" in userinfo:
			
 
				+                        verified = userinfo["verified"]
			
 
				+                    else:
			
 
				+                        verified = "null"
			
 
				+
			
 
				+                    # 过滤无效用户
			
 
				+                    if uid == "" or uid == "null":
			
 
				+                        Common.logger().info("无效用户")
			
 
				+
			
 
				+                    # 用户已存在云文档中
			
 
				+                    elif uid in [j for i in Feishu.get_values_batch("twitter", "db114c") for j in i]:
			
 
				+                        Common.logger().info("用户已存在:{}", uid)
			
 
				+                        time.sleep(1)
			
 
				+                        values = [str(name),
			
 
				+                                  str(screen_name),
			
 
				+                                  str(person_url),
			
 
				+                                  str(description),
			
 
				+                                  str(location),
			
 
				+                                  int(friends_count),
			
 
				+                                  int(followers_count),
			
 
				+                                  int(favourites_count),
			
 
				+                                  int(listed_count),
			
 
				+                                  int(statuses_count),
			
 
				+                                  int(media_count),
			
 
				+                                  str(display_url),
			
 
				+                                  str(created_at),
			
 
				+                                  str(profile_image_url),
			
 
				+                                  str(profile_banner_url),
			
 
				+                                  str(ext_has_nft_avatar),
			
 
				+                                  str(verified)]
			
 
				+                        cls.update_user_info(uid, key_word, values)
			
 
				+
			
 
				+                    # 用户未存在云文档中
			
 
				+                    else:
			
 
				+                        Common.logger().info("添加用户:{} 至云文档", name)
			
 
				+                        create_time = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(time.time()))
			
 
				+                        update_time = ""
			
 
				+
			
 
				+                        # 云文档插入行:https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?sheet=db114c
			
 
				+                        Feishu.insert_columns("twitter", "db114c", "ROWS", 1, 2)
			
 
				+                        # 云文档写入数据:https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?sheet=db114c
			
 
				+                        values = [[str(uid),
			
 
				+                                   str(key_word),
			
 
				+                                   str(name),
			
 
				+                                   str(screen_name),
			
 
				+                                   str(person_url),
			
 
				+                                   str(description),
			
 
				+                                   str(location),
			
 
				+                                   int(friends_count),
			
 
				+                                   int(followers_count),
			
 
				+                                   int(favourites_count),
			
 
				+                                   int(listed_count),
			
 
				+                                   int(statuses_count),
			
 
				+                                   int(media_count),
			
 
				+                                   str(display_url),
			
 
				+                                   str(created_at),
			
 
				+                                   str(profile_image_url),
			
 
				+                                   str(profile_banner_url),
			
 
				+                                   str(ext_has_nft_avatar),
			
 
				+                                   str(verified),
			
 
				+                                   str(create_time),
			
 
				+                                   str(update_time)]]
			
 
				+                        time.sleep(1)
			
 
				+                        Feishu.update_values("twitter", "db114c", "A2:U2", values)
			
 
				+                        Common.logger().info("添加成功\n")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            Common.logger().error("搜索用户异常:{}", e)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def search_users_by_key_words(cls):
			
 
				+        for key_word in cls.search_words():
			
 
				+            Common.logger().info("根据关键词:{} 搜索用户", key_word)
			
 
				+            cls.cursor = ''
			
 
				+            time.sleep(1)
			
 
				+            start = time.time()
			
 
				+            for i in range(200):
			
 
				+                Common.logger().info("正在请求第{}页", i+1)
			
 
				+                cls.search_users_v2(key_word)
			
 
				+            end_time = time.time()
			
 
				+            Common.logger().info("本次根据{}关键词搜索, 共耗时:{}秒", key_word, int(end_time-start))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    search = Search()
			
 
				+    # search.search_users("web3")
			
 
				+    search.search_users_by_key_words()