# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/5/23 import os import sys import time import requests from datetime import date, timedelta from dateutil import parser sys.path.append(os.getcwd()) from common import Common from feishu_lib import Feishu # proxies = {"http": "127.0.0.1:19180", "https": "127.0.0.1:19180"} proxies = {"http": None, "https": None} class Search: # 前天 2022-04-15 before_yesterday = (date.today() + timedelta(days=2)).strftime("%Y-%m-%d") # 昨天 2022-04-13 yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d") # 今天 2022-04-14 today = date.today() cursor = "" # 搜索词列表 @classmethod def search_words(cls): # 搜索词 word_list = [] # 从云文档读取所有敏感词,添加到词库列表 time.sleep(1) lists = Feishu.get_values_batch("twitter", "PZGpSZ") for i in lists: for j in i: # 过滤空的单元格内容 if j is None: pass elif "#" in j: pass else: word_list.append(j) return word_list # 更新用户信息 @classmethod def update_user_info(cls, uid, key_word, values): try: if len(Feishu.get_values_batch("twitter", "db114c")) == 1: Common.logger().info("无用户信息") else: time.sleep(1) i = Feishu.find_cell("twitter", "db114c", uid) user_words = Feishu.get_range_value("twitter", "db114c", "B" + str(i) + ":" + "B" + str(i)) user_create_time = Feishu.get_range_value("twitter", "db114c", "T" + str(i) + ":" + "T" + str(i))[0] user_update_time = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(time.time())) if key_word in user_words: Common.logger().info("满足条件:key_word已存在,更新当前用户信息:{}", uid) time.sleep(1) values.append(user_create_time) values.append(user_update_time) Common.logger().info("values:{}", values) Feishu.update_values("twitter", "db114c", "C" + str(i) + ":" + "U" + str(i), [values]) Common.logger().info("用户:{}信息更新成功", uid) return elif key_word not in user_words: Common.logger().info("满足条件:key_word不存在,更新当前用户信息:{}", uid) # 先更新除了 key_word 以外的信息 time.sleep(1) values.append(user_create_time) values.append(user_update_time) Common.logger().info("values:{}", values) Feishu.update_values("twitter", "db114c", "C" + str(i) + ":" + "U" + str(i), [values]) Common.logger().info("用户:{}信息更新成功", uid) # 再更新 key_word time.sleep(1) words = user_words[0]+","+key_word Feishu.update_values("twitter", "db114c", "B" + str(i) + ":" + "B" + str(i), [[str(words)]]) Common.logger().info("用户key_word:{}更新成功", key_word) return except Exception as e: Common.logger().error("更新用户信息异常:{}", e) # 根据关键字搜索 @classmethod def search_users_v2(cls, key_word): try: cursor_params = '' if len(cls.cursor) > 0: cursor_params = '&cursor={}'.format(cls.cursor) # 搜索最近三天的数据 # url = "https://twitter.com/i/api/2/search/adaptive.json?" \ # "include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&" \ # "include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&" \ # "include_can_media_tag=1&include_ext_has_nft_avatar=1&skip_status=1&" \ # "cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&" \ # "include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&" \ # "include_ext_media_color=true&include_ext_media_availability=true&" \ # "include_ext_sensitive_media_warning=true&include_ext_trusted_friends_metadata=true&" \ # "send_error_codes=true&simple_quoted_tweet=true&" \ # "q=(" + key_word + ")%20until%3A" + str(cls.today) + "%20since%3A" + str(cls.before_yesterday) + \ # "&result_filter=user&count=20&query_source=typed_query" + cursor_params + \ # "&pc=1&spelling_corrections=1&ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2" \ # "Cenrichments%2CsuperFollowMetadata%2CunmentionInfo" url = "https://twitter.com/i/api/2/search/adaptive.json?" \ "include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&" \ "include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&" \ "include_can_media_tag=1&include_ext_has_nft_avatar=1&skip_status=1&" \ "cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&" \ "include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&" \ "include_ext_media_color=true&include_ext_media_availability=true&" \ "include_ext_sensitive_media_warning=true&include_ext_trusted_friends_metadata=true&" \ "send_error_codes=true&simple_quoted_tweet=true&" \ "q=" + key_word + \ "&result_filter=user&count=20&query_source=typed_query" + cursor_params + \ "&pc=1&spelling_corrections=1&ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2" \ "Cenrichments%2CsuperFollowMetadata%2CunmentionInfo" headers = { 'authority': 'twitter.com', 'accept': '*/*', 'accept-language': 'zh-CN,zh;q=0.9', 'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz' '4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA', 'cookie': 'guest_id_marketing=v1%3A164691584304284451; guest_id_ads=v1%3A164691584304284451;' ' kdt=RGGgmMi1qsAE8ap8NlKFjpksuDXG9gdD1utIeK0u; des_opt_in=Y; _gcl_au=1.1.1066' '77612.1647418528;' ' g_state={"i_l":0}; _gid=GA1.2.645428048.1652699425;' ' personalization_id="v1_zSZMfoG7rsTlMHQYwOA39Q=="; guest_id=v1%3A165294843395764407;' ' auth_token=592dbe3e68ce355f31f8343d700215030fbcd817;' ' ct0=df0294bd236bf2b599c0c62906066652be2f03658877d0fe982fbb0bb645270e8485ddb2f7f39a447' 'b9e7ab341e244415576d8303df6302876fb00b8a5c996871bcfc2703a5d1c1056545ab007de55be;' ' twid=u%3D1501900092303101953; external_referer=padhuUp37zg6GVaBnLSoCA0layDKYA' 'Tn|0|8e8t2xd8A2w%3D; mbox=PC#3ffa21b420af400ca9e94d2b1b72525c.32_0#1716385856|s' 'ession#047c8af8f5e34fa585b247e05c6f0a6b#1653142916; _ga=GA1.2.659870250.1646915849;' ' _ga_BYKEBDM7DS=GS1.1.1653201242.12.0.1653201242.0; _ga_34PHSZMC42=GS1.1.1653201242.5' '8.0.1653201242.0; lang=zh-cn; _twitter_sess=BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6R' 'mxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCMQBs%252BqAAToMY3NyZl9p%250AZC' 'IlYjJkNWIyOTZiMzhmMGVlNWM1NDY0MmUyNDM5NTJkNjg6B2lkIiVkZjNl%250AMWNkNTY5OTUwNDdiYzgzNDE1NG' 'UyNjA3ZWU1NA%253D%253D--b3450fa2f7a9503c9e5e8356aff22570d29a7912; guest_id=v1%3A16479480474' '0239293; guest_id_ads=v1%3A164794804740239293; guest_id_marketing=v1%3A164794804740239293;' ' personalization_id="v1_/1LnzKXLyeYnZl13Ri62bg=="', # 搜索最近三天的 # 'referer': "https://twitter.com/search?q=(" + key_word + ")%20until%3A" + str(cls.today) + # "%20since%3A" + str(cls.before_yesterday) + "&src=typed_query&f=user", 'referer': "https://twitter.com/search?q=" + key_word + "&src=typed_query&f=user", 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)' ' Chrome/101.0.4951.64 Safari/537.36', 'x-csrf-token': 'df0294bd236bf2b599c0c62906066652be2f03658877d0fe982fbb0bb645270e8485ddb2f' '7f39a447b9e7ab341e244415576d8303df6302876fb00b8a5c996871bcfc2703a5d1c10565' '45ab007de55be', 'x-twitter-active-user': 'yes', 'x-twitter-auth-type': 'OAuth2Session', 'x-twitter-client-language': 'zh-cn' } r = requests.get(url=url, headers=headers, proxies=proxies) # Common.logger().info("response:{}", r.text) cls.cursor = r.json()["timeline"]["instructions"][-1]["addEntries"][ "entries"][-1]["content"]["operation"]["cursor"]["value"] # Common.logger().info("cursor:{}", cls.cursor) users = r.json()["globalObjects"]["users"] if len(users) == 0: Common.logger().info("本次请求无数据返回") return else: userid_list = [] for userid in users: userid_list.append(userid) for userinfo in userid_list: userinfo = users[userinfo] if "id_str" in userinfo: uid = userinfo["id_str"] else: uid = "null" if "name" in userinfo: name = userinfo["name"] else: name = "null" if "screen_name" in userinfo: screen_name = userinfo["screen_name"] else: screen_name = "null" if screen_name == "null": person_url = "null" else: person_url = "https://twitter.com/" + screen_name if "description" in userinfo: description = userinfo["description"] else: description = "null" if "location" in userinfo: location = userinfo["location"] else: location = "null" if "friends_count" in userinfo: friends_count = userinfo["friends_count"] else: friends_count = "null" if "followers_count" in userinfo: followers_count = userinfo["followers_count"] else: followers_count = "null" if "favourites_count" in userinfo: favourites_count = userinfo["favourites_count"] else: favourites_count = "null" if "listed_count" in userinfo: listed_count = userinfo["listed_count"] else: listed_count = "null" if "statuses_count" in userinfo: statuses_count = userinfo["statuses_count"] else: statuses_count = "null" if "media_count" in userinfo: media_count = userinfo["media_count"] else: media_count = "null" if "entities" not in userinfo: display_url = "null" elif "url" not in userinfo["entities"]: display_url = "null" elif "display_url" in userinfo["entities"]["url"]["urls"][0]: display_url = userinfo["entities"]["url"]["urls"][0]["display_url"] elif "expanded_url" in userinfo["entities"]["url"]["urls"][0]: display_url = userinfo["entities"]["url"]["urls"][0]["expanded_url"] elif "url" in userinfo["entities"]["url"]["urls"][0]: display_url = userinfo["entities"]["url"]["urls"][0]["url"] else: display_url = "null" if "created_at" in userinfo: created_at1 = userinfo["created_at"] created_at = str(parser.parse(created_at1).strftime("%Y/%m/%d %H:%M:%S")) else: created_at = "null" if "profile_image_url" in userinfo: profile_image_url = userinfo["profile_image_url"] else: profile_image_url = "null" if "profile_banner_url" in userinfo: profile_banner_url = userinfo["profile_banner_url"] else: profile_banner_url = "null" if "ext_has_nft_avatar" in userinfo: ext_has_nft_avatar = userinfo["ext_has_nft_avatar"] else: ext_has_nft_avatar = "null" if "verified" in userinfo: verified = userinfo["verified"] else: verified = "null" # 过滤无效用户 if uid == "" or uid == "null": Common.logger().info("无效用户") # 用户已存在云文档中 elif uid in [j for i in Feishu.get_values_batch("twitter", "db114c") for j in i]: Common.logger().info("用户已存在:{}", uid) pass # time.sleep(1) # values = [str(name), # str(screen_name), # str(person_url), # str(description), # str(location), # int(friends_count), # int(followers_count), # int(favourites_count), # int(listed_count), # int(statuses_count), # int(media_count), # str(display_url), # str(created_at), # str(profile_image_url), # str(profile_banner_url), # str(ext_has_nft_avatar), # str(verified)] # cls.update_user_info(uid, key_word, values) # 用户未存在云文档中 else: Common.logger().info("添加用户:{} 至云文档", name) create_time = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(time.time())) update_time = "" # 云文档插入行:https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?sheet=db114c Feishu.insert_columns("twitter", "db114c", "ROWS", 1, 2) # 云文档写入数据:https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?sheet=db114c values = [[str(uid), str(key_word), str(name), str(screen_name), str(person_url), str(description), str(location), int(friends_count), int(followers_count), int(favourites_count), int(listed_count), int(statuses_count), int(media_count), str(display_url), str(created_at), str(profile_image_url), str(profile_banner_url), str(ext_has_nft_avatar), str(verified), str(create_time), str(update_time)]] time.sleep(1) Feishu.update_values("twitter", "db114c", "A2:U2", values) Common.logger().info("添加成功\n") except Exception as e: Common.logger().error("搜索用户异常:{}", e) @classmethod def search_users_by_key_words(cls): for key_word in cls.search_words(): Common.logger().info("根据关键词:{} 搜索用户", key_word) cls.cursor = '' time.sleep(1) start = time.time() for i in range(200): Common.logger().info("正在请求第{}页", i+1) cls.search_users_v2(key_word) end_time = time.time() Common.logger().info("本次根据{}关键词搜索, 共耗时:{}秒", key_word, int(end_time-start)) if __name__ == "__main__": search = Search() # search.search_users("web3") search.search_users_by_key_words()