wangkun пре 2 година
родитељ
комит
2d53f71d37
7 измењених фајлова са 722 додато и 204 уклоњено
  1. 6 0
      README.md
  2. 0 154
      main/copy_usersinfo.py
  3. 59 33
      main/demo.py
  4. 564 5
      main/feishu_lib.py
  5. 1 7
      main/run.py
  6. 46 5
      main/search_by_words.py
  7. 46 0
      main/sort_lib.py

+ 6 - 0
README.md

@@ -8,3 +8,9 @@ urllib3==1.26.9
 
 1.必须要可以访问 twitter 的网络条件
 2.执行入口 ./main/run.py
+
+#********** 2022/06/24 **********#
+1.爬虫数据启用新文档:https://w42nne6hzg.feishu.cn/base/bascnpAYvIA0B1hBtNJlriZceUV?table=tblqMbXrpqFbDLNE&view=vewsMtek0O
+2.旧文档依然使用:https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?sheet=db114c&table=tblEUZWvkiLPEmam&view=vew2a2J1NI
+3.每个搜索词,搜索 20 条/页,共搜索 400 页
+4.新文档插入数据后,自动按照 created_at 字段降序排列

+ 0 - 154
main/copy_usersinfo.py

@@ -1,154 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/6/20
-import time
-
-import requests
-import urllib3
-from main.common import Common
-from main.feishu_lib import Feishu
-proxies = {"http": None, "https": None}
-
-
-class CopyUsersInfo:
-
-    # 备份表数据
-    @classmethod
-    def copyed_data(cls):
-        try:
-            data_list = []
-            lists = Feishu.get_values_batch("twitter", "WPJILC")
-            for i in lists:
-                for j in i:
-                    # 过滤空的单元格内容
-                    if j is None:
-                        pass
-                    else:
-                        data_list.append(j)
-            return data_list
-        except Exception as e:
-            Common.logger().error("获取备份表数据异常:{}", e)
-
-    # 增加工作表,复制工作表、删除工作表。
-    @classmethod
-    def sheets_batch_update(cls):
-        """
-        https://open.feishu.cn/document/ukTMukTMukTM/uYTMzUjL2EzM14iNxMTN
-        """
-        url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/shtcn6BYfYuqegIP13ORB6rI2dh/sheets_batch_update"
-        headers = {
-            "Authorization": "Bearer " + Feishu.get_token(),
-            "Content-Type": "application/json; charset=utf-8"
-        }
-        body = {
-            "requests": [
-                {
-                    "copySheet": {
-                        "source": {
-                            "sheetId": "db114c"
-                        },
-                        "destination": {
-                            "title": ""
-                        }
-                    }
-                }
-            ]
-        }
-        try:
-            urllib3.disable_warnings()
-            r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
-            print(r.json())
-            # Common.logger().info("增加工作表,复制工作表、删除工作表:{}", r.json()["msg"])
-        except Exception as e:
-            Common.logger().error("增加工作表,复制工作表、删除工作表异常:{}", e)
-
-    # 复制用户信息
-    @classmethod
-    def copy_usersinfo(cls):
-        try:
-            user_list = Feishu.get_values_batch("twitter", "db114c")
-            for i in range(1, len(user_list[1:])):
-                uid = user_list[i][0]
-                key_word = user_list[i][1]
-                name = user_list[i][2]
-                screen_name = user_list[i][3]
-                person_url = user_list[i][4]
-                description = user_list[i][5]
-                location = user_list[i][6]
-                friends_count = user_list[i][7]
-                followers_count = user_list[i][8]
-                favourites_count = user_list[i][9]
-                listed_count = user_list[i][10]
-                statuses_count = user_list[i][11]
-                media_count = user_list[i][12]
-                display_url = user_list[i][13]
-                created_at = user_list[i][14]
-                profile_image_url = user_list[i][15]
-                profile_banner_url = user_list[i][16]
-                ext_has_nft_avatar = user_list[i][17]
-                verified = user_list[i][18]
-                created_time = user_list[i][19]
-                update_time = user_list[i][20]
-                # print(f"\n第{i}个用户信息")
-                # print(f"uid:{uid}")
-                # print(f"key_word:{key_word}")
-                # print(f"name:{name}")
-                # print(f"screen_name:{screen_name}")
-                # print(f"person_url:{person_url}")
-                # print(f"description:{description}")
-                # print(f"location:{location}")
-                # print(f"friends_count:{friends_count}")
-                # print(f"followers_count:{followers_count}")
-                # print(f"favourites_count:{favourites_count}")
-                # print(f"listed_count:{listed_count}")
-                # print(f"statuses_count:{statuses_count}")
-                # print(f"media_count:{media_count}")
-                # print(f"display_url:{display_url}")
-                # print(f"created_at:{created_at}")
-                # print(f"profile_image_url:{profile_image_url}")
-                # print(f"profile_banner_url:{profile_banner_url}")
-                # print(f"ext_has_nft_avatar:{ext_has_nft_avatar}, type:{type(ext_has_nft_avatar)}")
-                # print(f"verified:{verified}, type:{type(verified)}")
-                # print(f"created_time:{created_time}")
-                # print(f"update_time:{update_time}")
-                # print("\n")
-
-                if uid in cls.copyed_data():
-                    Common.logger().info("用户信息已存在")
-                    return
-                else:
-                    Common.logger().info("开始拷贝第{}个用户信息", i)
-                    time.sleep(1)
-                    Feishu.insert_columns("twitter", "WPJILC", "ROWS", 1, 2)
-                    values = [[uid,
-                               key_word,
-                               name,
-                               screen_name,
-                               person_url,
-                               description,
-                               location,
-                               friends_count,
-                               followers_count,
-                               favourites_count,
-                               listed_count,
-                               statuses_count,
-                               media_count,
-                               display_url,
-                               created_at,
-                               profile_image_url,
-                               profile_banner_url,
-                               str(ext_has_nft_avatar),
-                               str(verified),
-                               created_time,
-                               update_time]]
-                    time.sleep(1)
-                    Feishu.update_values("twitter", "WPJILC", "A2:U2", values)
-
-        except Exception as e:
-            Common.logger().error("复制用户信息异常:{}", e)
-
-
-if __name__ == "__main__":
-    copy = CopyUsersInfo()
-    copy.sheets_batch_update()
-    # copy.copy_usersinfo()

+ 59 - 33
main/demo.py

@@ -1,42 +1,68 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Time: 2022/5/30
+import json
 import time
+import requests
 from datetime import date
 
+import urllib3
 from dateutil import parser
-
 from feishu_lib import Feishu
 
-# time1 = time.time()
-# time2 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time1))
-# print(time2)
-# print(type(time2))
-
-# # 前天 <class 'str'>  2022-04-15
-# before_yesterday = (date.today() + timedelta(days=-2)).strftime("%Y-%m-%d")
-# # 昨天 <class 'str'>  2022-04-13
-# yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
-# # 今天 <class 'datetime.date'>  2022-04-14
-# today = date.today()
-# print(before_yesterday)
-# print(yesterday)
-# print(today)
-
-# lists = Feishu.get_values_batch("twitter", "PZGpSZ")
-# for i in lists:
-#     for j in i:
-#         # 过滤空的单元格内容
-#         if j is None:
-#             pass
-#         elif "#" in j:
-#             pass
-#         else:
-#             print(j)
-
-# date1 = parser.parse('null').strftime("%Y/%m/%d %H:%M:%S")
-# print(date1)
-time1 = time.time()
-time.sleep(1)
-time2 = time.time()
-print(time2-time1)
+
+class Demo:
+    action = 0
+
+    @classmethod
+    def times(cls):
+        time1 = time.time()
+        time2 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time1))
+        print(time2)
+
+        # # 前天 <class 'str'>  2022-04-15
+        # before_yesterday = (date.today() + timedelta(days=-2)).strftime("%Y-%m-%d")
+        # # 昨天 <class 'str'>  2022-04-13
+        # yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
+        # # 今天 <class 'datetime.date'>  2022-04-14
+        # today = date.today()
+        # print(before_yesterday)
+        # print(yesterday)
+        # print(today)
+
+        # lists = Feishu.get_values_batch("twitter", "PZGpSZ")
+        # for i in lists:
+        #     for j in i:
+        #         # 过滤空的单元格内容
+        #         if j is None:
+        #             pass
+        #         elif "#" in j:
+        #             pass
+        #         else:
+        #             print(j)
+
+        # date1 = parser.parse('null').strftime("%Y/%m/%d %H:%M:%S")
+        # print(date1)
+        # time1 = time.time()
+        # time.sleep(1)
+        # time2 = time.time()
+        # print(time2 - time1)
+
+    @classmethod
+    def created_at(cls):
+        created_at1 = "Thu Feb 06 05:03:51 +0000 2014"
+        created_at = str(parser.parse(created_at1).strftime("%Y/%m/%d %H:%M:%S"))
+        print(created_at)
+
+        time3 = int(time.mktime(time.strptime(created_at, "%Y/%m/%d %H:%M:%S")))*1000
+        print(time3)
+
+        create_time = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(time.time()))
+        print(create_time)
+        time4 = int(time.mktime(time.strptime(create_time, "%Y/%m/%d %H:%M:%S"))) * 1000
+        print(time4)
+
+
+if __name__ == "__main__":
+    demo = Demo()
+    demo.created_at()

+ 564 - 5
main/feishu_lib.py

@@ -312,7 +312,7 @@ class Feishu:
         rows_count = len(cls.get_values_batch("twitter", "db114c"))
         body = {
             "find_condition": {
-                "range": sheetid+"!A1:A"+str(rows_count),
+                "range": sheetid + "!A1:A" + str(rows_count),
                 "match_case": True,  # 是否忽略大小写
                 "match_entire_cell": False,  # 是否匹配整个单元格
                 "search_by_regex": False,  # 是否为正则匹配
@@ -329,10 +329,569 @@ class Feishu:
         except Exception as e:
             Common.logger().error("查找单元格异常:{}", e)
 
+    # 筛选:filter
+    @classmethod
+    def filter_created_at(cls):
+        filter_created_at_url = "https://open.feishu.cn/open-apis/sheets/v3/spreadsheets/" \
+                                "shtcn8fFzDhCFHpB6vzf51s2xbf/sheets/48cfb0/filter"
+        headers = {
+            "Authorization": "Bearer " + cls.get_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "col": "A",
+            "condition": {
+                "filter_type": "number",
+                "compare_type": "less",
+                "expected": [
+                    "6"
+                ]
+            }
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.put(url=filter_created_at_url, headers=headers, json=body, proxies=proxies, verify=False)
+            print(r.json())
+        except Exception as e:
+            Common.logger().error("查找单元格异常:{}", e)
+
+
+class Bitable:
+    """
+    多维表格 API
+    文档地址:https://w42nne6hzg.feishu.cn/base/bascnpAYvIA0B1hBtNJlriZceUV?table=tblqMbXrpqFbDLNE&view=vewsMtek0O
+    app_token:bascnpAYvIA0B1hBtNJlriZceUV
+    """
+    app_token = "bascnpAYvIA0B1hBtNJlriZceUV"
+    table_id = "tblqMbXrpqFbDLNE"
+    page_token = ""  # 列出记录时,翻页参数
+
+    # 获取飞书api token
+    @classmethod
+    def tenant_access_token(cls):
+        """
+        获取飞书api token
+        :return:
+        """
+        time.sleep(1)
+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
+
+        try:
+            urllib3.disable_warnings()
+            response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
+            tenant_access_token = response.json()["tenant_access_token"]
+            return tenant_access_token
+        except Exception as e:
+            Common.logger().error("获取tenant_access_token异常:{}", e)
+
+    # 获取多维表格元数据
+    @classmethod
+    def get_apps(cls):
+        """
+        获取多维表格元数据
+        该接口支持调用频率上限为 20 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app/get
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" + cls.app_token
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.get(url=url, headers=headers, proxies=proxies, verify=False)
+            Common.logger().info("获取多维表格元数据,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("获取多维表格元数据异常:{}", e)
+
+    # 列出数据表
+    @classmethod
+    def get_tables(cls):
+        """
+        列出数据表
+        该接口支持调用频率上限为 20 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table/list
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" + cls.app_token + "/tables"
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            "page_token": "",
+            "page_size": ""
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+            Common.logger().info("列出数据表,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("列出数据表异常:{}", e)
+
+    # 列出记录
+    @classmethod
+    def list_records(cls, count):
+        """
+        该接口用于列出数据表中的现有记录,单次最多列出 100 行记录,支持分页获取。
+        该接口支持调用频率上限为 1000 次/分钟
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/list
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records"
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            "view_id": "",  # 视图 id; 注意:
+            # 如 filter 或 sort 有值,view_id 会被忽略。
+            # 示例值: "vewqhz51lk"
+            "filter": "",  # 筛选参数; 注意:
+            # 1.筛选记录的表达式不超过2000个字符。
+            # 2.不支持对“人员”以及“关联字段”的属性进行过滤筛选,如人员的 OpenID。
+            # 3.仅支持字段在页面展示字符值进行筛选。
+            # 详细参考:https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/filter
+            # 示例值:"示例表达式:AND(CurrentValue.[身高]>180, CurrentValue.[体重]>150)"
+            "sort": "",  # 排序参数。注意:
+            # 1.表达式需要不超过1000字符。
+            # 2.不支持对带“公式”和“关联字段”的表的使用。
+            # 示例值:"["字段1 DESC","字段2 ASC"]
+            # 注意:使用引号将字段名称和顺序逆序连接起来。"
+            "field_names": "[]",  # 字段名称。示例值:"["字段1"]"
+            "text_field_as_array": True,  # 控制多行文本字段数据的返回格式,true 表示以数组形式返回。注意:
+            # 1.多行文本中如果有超链接部分,则会返回链接的 URL。
+            # 2.目前可以返回多行文本中 URL 类型为多维表格链接、飞书 doc、飞书 sheet的URL类型以及@人员的数据结构。
+            # 示例值:true
+            # "user_id_type": "",  # 用户 ID 类型
+            # 示例值:"open_id"
+            # 可选值有:
+            # open_id:用户的 open id
+            # union_id:用户的 union id
+            # user_id:用户的 user id
+            # 默认值:open_id
+            "display_formula_ref": "",  # 控制公式、查找引用是否显示完整的原样返回结果。示例值:true
+            "automatic_fields": "",  # 控制是否返回自动计算的字段
+            # 例如 created_by/created_time/last_modified_by/last_modified_time,true 表示返回
+            # 示例值:true
+            "page_token": "",  # 分页标记
+            # 第一次请求不填,表示从头开始遍历;
+            # 分页查询结果还有更多项时会同时返回新的 page_token
+            # 下次遍历可采用该 page_token 获取查询结果
+            # 示例值:"recn0hoyXL"
+            "page_size": count  # 分页大小。示例值:10。数据校验规则:最大值 100
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+            cls.page_token = r.json()["data"]["page_token"]
+            items = r.json()["data"]["items"]
+            for item in items:
+                print(item)
+            Common.logger().info("列出记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("列出记录异常:{}", e)
+
+    # 检索记录
+    @classmethod
+    def search_records(cls, record_id):
+        """
+        该接口用于根据 record_id 的值检索现有记录
+        该接口支持调用频率上限为 20 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/get
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records/" + record_id
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        params = {
+            "text_field_as_array": True,  # 控制多行文本字段数据的返回格式, true 表示以数组形式返回。示例值:true
+            # "user_id_type": "",  # 用户 ID 类型
+            # 示例值:"open_id"
+            # 可选值有:
+            # open_id:用户的 open id
+            # union_id:用户的 union id
+            # user_id:用户的 user id
+            # 默认值:open_id
+            "display_formula_ref": True,  # 控制公式、查找引用是否显示完整的原样返回结果。示例值:true
+            "automatic_fields": True,  # 控制是否返回自动计算的字段
+            # 例如 created_by/created_time/last_modified_by/last_modified_time,true 表示返回。示例值:true
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+            Common.logger().info("检索记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("检索记录异常:{}", e)
+
+    # 新增记录
+    @classmethod
+    def create_record(cls, fields):
+        """
+        该接口用于在数据表中新增一条记录
+        该接口支持调用频率上限为 10 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/create
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records"
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = fields
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger().info("新增记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("新增记录异常:{}", e)
+
+    # 新增多条记录
+    @classmethod
+    def create_records(cls, records):
+        """
+        该接口用于在数据表中新增多条记录
+        该接口支持调用频率上限为 10 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/batch_create
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records/batch_create"
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "records": records
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger().info("新增多条记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("新增多条记录异常:{}", e)
+
+    # 更新记录
+    @classmethod
+    def update_record(cls, record_id, fields):
+        """
+        该接口用于更新数据表中的一条记录
+        该接口支持调用频率上限为 10 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/update
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records/" + record_id
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = fields
+        try:
+            urllib3.disable_warnings()
+            r = requests.put(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger().info("更新记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("更新记录异常:{}", e)
+
+    # 更新多条记录
+    @classmethod
+    def update_records(cls, records):
+        """
+        该接口用于更新数据表中的多条记录
+        该接口支持调用频率上限为 10 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/batch_update
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records/batch_update"
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = records
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger().info("更新多条记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("更新多条记录异常:{}", e)
+
+    # 删除记录
+    @classmethod
+    def del_record(cls, record_id):
+        """
+        该接口用于删除数据表中的一条记录
+        该接口支持调用频率上限为 10 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/delete
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records/" + record_id
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.delete(url=url, headers=headers, proxies=proxies, verify=False)
+            Common.logger().info("删除记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("删除记录异常:{}", e)
+
+    # 删除多条记录
+    @classmethod
+    def del_records(cls, record_ids):
+        """
+        该接口用于删除数据表中现有的多条记录
+        该接口支持调用频率上限为 10 QPS
+        https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-record/batch_delete
+        """
+        url = "https://open.feishu.cn/open-apis/bitable/v1/apps/" \
+              + cls.app_token + "/tables/" + cls.table_id + "/records/batch_delete"
+        headers = {
+            "Authorization": "Bearer " + cls.tenant_access_token(),
+            "Content-Type": "application/json; charset=utf-8"
+        }
+        body = {
+            "records": record_ids  # 删除的多条记录id列表。示例值:["recIcJBbvC","recvmiCORa"]
+        }
+        try:
+            urllib3.disable_warnings()
+            r = requests.post(url=url, headers=headers, json=body, proxies=proxies, verify=False)
+            Common.logger().info("删除多条记录,code:{},msg:{}", r.json()["code"], r.json()["msg"])
+        except Exception as e:
+            Common.logger().error("删除多条记录异常:{}", e)
+
 
 if __name__ == "__main__":
-    feishu = Feishu()
-    print(feishu.find_cell("twitter", "db114c", "956929025645035522"))
-    print(type(feishu.find_cell("twitter", "db114c", "956929025645035522")))
+    # feishu = Feishu()
+    # print(feishu.get_bitable_token())
+
+    'reck6nLiZV'
+    'recHcfJZnG'
+    'recxdSMhzE'
+
+    # 实例化多维表格
+    bitable = Bitable()
+
+    # # 获取多维表格元数据
+    # bitable.get_apps()
+    #
+    # # 列出数据表
+    # bitable.get_tables()
+    #
+    # # 列出记录
+    # bitable.list_records(3)
+    #
+    # # 检索记录
+    # bitable.search_records("recHcfJZnG")
+
+    # # 新增一条记录
+    # create_value = {
+    #     "fields": {
+    #         "uid": "0000000000",
+    #         "key_words": "0000000000",
+    #         "name": "功能开发🥕",
+    #         "screen_name": "功能开发🥕",
+    #         "person_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/", "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "description": "功能开发🥕",
+    #         "location": "null",
+    #         "friends_count": 9999999999,
+    #         "followers_count": 9999999999,
+    #         "favourites_count": 9999999999,
+    #         "listed_count": 9999999999,
+    #         "statuses_count": 9999999999,
+    #         "media_count": 9999999999,
+    #         "display_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/",
+    #             "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "created_at": 1656053209000,
+    #         "profile_image_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/",
+    #             "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "profile_banner_url": {
+    #             "link": "null",
+    #             "text": "null"
+    #         },
+    #         "ext_has_nft_avatar": "False",
+    #         "verified": "False",
+    #         "记录创建时间": 1656053209000,
+    #         # "记录修改时间": ""
+    #     }
+    # }
+    # bitable.create_record(create_value)
+
+    # 新增多条记录
+    # create_values = {
+    #     "fields": {
+    #         "uid": "0000000000",
+    #         "key_words": "0000000000",
+    #         "name": "功能开发🥕",
+    #         "screen_name": "功能开发🥕",
+    #         "person_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/", "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "description": "功能开发🥕",
+    #         "location": "null",
+    #         "friends_count": 9999999999,
+    #         "followers_count": 9999999999,
+    #         "favourites_count": 9999999999,
+    #         "listed_count": 9999999999,
+    #         "statuses_count": 9999999999,
+    #         "media_count": 9999999999,
+    #         "display_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/",
+    #             "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "created_at": 1656053209000,
+    #         "profile_image_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/",
+    #             "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "profile_banner_url": {
+    #             "link": "null",
+    #             "text": "null"
+    #         },
+    #         "ext_has_nft_avatar": "False",
+    #         "verified": "False",
+    #         "记录创建时间": 1656053209000,
+    #         # "记录修改时间": ""
+    #     }
+    # }
+    # values_list = [create_values, create_values]
+    # bitable.create_records(values_list)
+
+    # # 更新一条记录
+    # use_record_id = "recxdSMhzE"
+    # use_fields = {
+    #     "fields": {
+    #         "uid": "1111111111",
+    #         "key_words": "1111111111",
+    #         "name": "功能开发🥕",
+    #         "screen_name": "功能开发🥕",
+    #         "person_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/", "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "description": "功能开发🥕",
+    #         "location": "null",
+    #         "friends_count": 9999999999,
+    #         "followers_count": 9999999999,
+    #         "favourites_count": 9999999999,
+    #         "listed_count": 9999999999,
+    #         "statuses_count": 9999999999,
+    #         "media_count": 9999999999,
+    #         "display_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/",
+    #             "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "created_at": 1656053209000,
+    #         "profile_image_url": {
+    #             "link": "https://bytedance.feishu.cn/drive/home/",
+    #             "text": "https://bytedance.feishu.cn/drive/home/"
+    #         },
+    #         "profile_banner_url": {
+    #             "link": "null",
+    #             "text": "null"
+    #         },
+    #         "ext_has_nft_avatar": "False",
+    #         "verified": "False",
+    #         "记录创建时间": 1656053209000,
+    #         # "记录修改时间": ""
+    #     }
+    # }
+    # bitable.update_record(use_record_id, use_fields)
+
+    # # 更新多条记录
+    # "recxdSMhzE"
+    # "recHcfJZnG"
+    # use_records = {
+    #     "records": [
+    #         {
+    #             "record_id": "recxdSMhzE",
+    #             "fields": {
+    #                 "uid": "3333333333",
+    #                 "key_words": "3333333333",
+    #                 "name": "功能开发🥕",
+    #                 "screen_name": "功能开发🥕",
+    #                 "person_url": {
+    #                     "link": "https://bytedance.feishu.cn/drive/home/",
+    #                     "text": "https://bytedance.feishu.cn/drive/home/"
+    #                 },
+    #                 "description": "功能开发🥕",
+    #                 "location": "null",
+    #                 "friends_count": 9999999999,
+    #                 "followers_count": 9999999999,
+    #                 "favourites_count": 9999999999,
+    #                 "listed_count": 9999999999,
+    #                 "statuses_count": 9999999999,
+    #                 "media_count": 9999999999,
+    #                 "display_url": {
+    #                     "link": "https://bytedance.feishu.cn/drive/home/",
+    #                     "text": "https://bytedance.feishu.cn/drive/home/"
+    #                 },
+    #                 "created_at": 1656053209000,
+    #                 "profile_image_url": {
+    #                     "link": "https://bytedance.feishu.cn/drive/home/",
+    #                     "text": "https://bytedance.feishu.cn/drive/home/"
+    #                 },
+    #                 "profile_banner_url": {
+    #                     "link": "null",
+    #                     "text": "null"
+    #                 },
+    #                 "ext_has_nft_avatar": "False",
+    #                 "verified": "False",
+    #                 "记录创建时间": 1656053209000,
+    #                 # "记录修改时间": ""
+    #             }
+    #         },
+    #         {
+    #             "record_id": "recHcfJZnG",
+    #             "fields": {
+    #                 "uid": "3333333333",
+    #                 "key_words": "3333333333",
+    #                 "name": "功能开发🥕",
+    #                 "screen_name": "功能开发🥕",
+    #                 "person_url": {
+    #                     "link": "https://bytedance.feishu.cn/drive/home/",
+    #                     "text": "https://bytedance.feishu.cn/drive/home/"
+    #                 },
+    #                 "description": "功能开发🥕",
+    #                 "location": "null",
+    #                 "friends_count": 9999999999,
+    #                 "followers_count": 9999999999,
+    #                 "favourites_count": 9999999999,
+    #                 "listed_count": 9999999999,
+    #                 "statuses_count": 9999999999,
+    #                 "media_count": 9999999999,
+    #                 "display_url": {
+    #                     "link": "https://bytedance.feishu.cn/drive/home/",
+    #                     "text": "https://bytedance.feishu.cn/drive/home/"
+    #                 },
+    #                 "created_at": 1656053209000,
+    #                 "profile_image_url": {
+    #                     "link": "https://bytedance.feishu.cn/drive/home/",
+    #                     "text": "https://bytedance.feishu.cn/drive/home/"
+    #                 },
+    #                 "profile_banner_url": {
+    #                     "link": "null",
+    #                     "text": "null"
+    #                 },
+    #                 "ext_has_nft_avatar": "False",
+    #                 "verified": "False",
+    #                 "记录创建时间": 1656053209000,
+    #                 # "记录修改时间": ""
+    #             }
+    #         }
+    #     ]
+    # }
+    # bitable.update_records(use_records)
+
+    # # 删除一条记录
+    # bitable.del_record("reck6nLiZV")
 
-    pass
+    # # 删除多条记录
+    # bitable.del_records(['recHcfJZnG', 'recxdSMhzE'])

+ 1 - 7
main/run.py

@@ -13,15 +13,9 @@ from search_by_words import Search
 
 def main_pord():
     while True:
+        Common.logger().info("开始抓取 twitter 用户信息")
         Search.search_users_by_key_words()
         Common.del_logs()
-        # main_pord_time = datetime.datetime.now()
-        # while True:
-        #     if main_pord_time.hour == 23 and main_pord_time.minute <= 30:
-        #         CopyUsersInfo.copy_usersinfo()
-        #         break
-        #     else:
-        #         Search.search_users_by_key_words()
 
 
 if __name__ == "__main__":

+ 46 - 5
main/search_by_words.py

@@ -10,7 +10,7 @@ from dateutil import parser
 
 sys.path.append(os.getcwd())
 from common import Common
-from feishu_lib import Feishu
+from feishu_lib import Feishu, Bitable
 
 # proxies = {"http": "127.0.0.1:19180", "https": "127.0.0.1:19180"}
 proxies = {"http": None, "https": None}
@@ -77,7 +77,7 @@ class Search:
                     Common.logger().info("用户:{}信息更新成功", uid)
                     # 再更新 key_word
                     time.sleep(1)
-                    words = user_words[0]+","+key_word
+                    words = user_words[0] + "," + key_word
                     Feishu.update_values("twitter", "db114c", "B" + str(i) + ":" + "B" + str(i),
                                          [[str(words)]])
                     Common.logger().info("用户key_word:{}更新成功", key_word)
@@ -336,7 +336,48 @@ class Search:
                                    str(update_time)]]
                         time.sleep(1)
                         Feishu.update_values("twitter", "db114c", "A2:U2", values)
-                        Common.logger().info("添加成功\n")
+                        Common.logger().info("添加至云文档成功")
+
+                        Common.logger().info("添加用户:{} 至多维表格", name)
+                        fields = {
+                            "fields": {
+                                "uid": str(uid),
+                                "key_words": str(key_word),
+                                "name": str(name),
+                                "screen_name": str(screen_name),
+                                "person_url": {
+                                    "link": str(person_url),
+                                    "text": str(person_url)
+                                },
+                                "description": str(description),
+                                "location": str(location),
+                                "friends_count": int(friends_count),
+                                "followers_count": int(followers_count),
+                                "favourites_count": int(favourites_count),
+                                "listed_count": int(listed_count),
+                                "statuses_count": int(statuses_count),
+                                "media_count": int(media_count),
+                                "display_url": {
+                                    "link": str(display_url),
+                                    "text": str(display_url)
+                                },
+                                "created_at": int(time.mktime(time.strptime(created_at, "%Y/%m/%d %H:%M:%S")))*1000,
+                                "profile_image_url": {
+                                    "link": str(profile_image_url),
+                                    "text": str(profile_image_url)
+                                },
+                                "profile_banner_url": {
+                                    "link": str(profile_banner_url),
+                                    "text": str(profile_banner_url)
+                                },
+                                "ext_has_nft_avatar": str(ext_has_nft_avatar),
+                                "verified": str(verified),
+                                "记录创建时间": int(time.mktime(time.strptime(create_time, "%Y/%m/%d %H:%M:%S"))) * 1000,
+                                # "记录修改时间": ""
+                            }
+                        }
+                        Bitable.create_record(fields)
+                        Common.logger().info("添加至多维表格成功\n")
 
         except Exception as e:
             Common.logger().error("搜索用户异常:{}", e)
@@ -349,10 +390,10 @@ class Search:
             time.sleep(1)
             start = time.time()
             for i in range(400):
-                Common.logger().info("正在请求第{}页", i+1)
+                Common.logger().info("正在请求第{}页", i + 1)
                 cls.search_users_v2(key_word)
             end_time = time.time()
-            Common.logger().info("本次根据{}关键词搜索, 共耗时:{}秒", key_word, int(end_time-start))
+            Common.logger().info("本次根据{}关键词搜索, 共耗时:{}秒", key_word, int(end_time - start))
 
 
 if __name__ == "__main__":

+ 46 - 0
main/sort_lib.py

@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/6/22
+import os
+import time
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+
+
+class Sort:
+    # creat_at 降序
+    @classmethod
+    def sort_creat_at(cls):
+        """
+        1.在进程中,杀死所有 chrome 进程
+        2.需要在 terminal中,使用命令:Google\ Chrome -remote-debugging-port=9222 ,开启一个 Chrome
+        """
+        # print("打开Chrome")
+        # cmd = r"Google\ Chrome -remote-debugging-port=9222"
+        # print(f"os.system(cmd):{os.system(cmd)}")
+        # os.system(cmd)
+
+        options = webdriver.ChromeOptions()
+        options.debugger_address = "127.0.0.1:9222"
+        # driver = webdriver.Chrome(options=options, service=Service(r'/Users/wangkun/Downloads/chromedriver'))
+        driver = webdriver.Chrome(options=options)
+
+        print("打开飞书文档")
+        driver.get("https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh")
+
+        print("等待 2s")
+        time.sleep(2)
+
+        # created_at_list = driver.find_elements(By.TAG_NAME, "span")
+        created_at_list = driver.find_elements(By.CLASS_NAME, "faster-single-canvas")
+        for created_at in created_at_list:
+            print(f"created_at.text:{created_at.text}")
+
+        print("退出Chrome")
+        driver.quit()
+
+
+if __name__ == "__main__":
+    sort = Sort()
+    sort.sort_creat_at()